diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -1,27 +1,50 @@ -[2024-07-05 00:34:00,964][49215] Worker 0 uses CPU cores [0, 1] -[2024-07-05 00:34:00,965][49233] Worker 3 uses CPU cores [6, 7] -[2024-07-05 00:34:00,999][49214] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 00:34:01,000][49214] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 00:34:01,009][49237] Worker 7 uses CPU cores [14, 15] -[2024-07-05 00:34:01,047][49214] Num visible devices: 1 -[2024-07-05 00:34:01,110][49236] Worker 6 uses CPU cores [12, 13] -[2024-07-05 00:34:01,114][49232] Worker 2 uses CPU cores [4, 5] -[2024-07-05 00:34:01,267][49201] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 00:34:01,267][49201] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 00:34:01,314][49201] Num visible devices: 1 -[2024-07-05 00:34:01,333][49201] Starting seed is not provided -[2024-07-05 00:34:01,333][49201] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 00:34:01,333][49201] Initializing actor-critic model on device cuda:0 -[2024-07-05 00:34:01,334][49201] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 00:34:01,334][49201] RunningMeanStd input shape: (1,) -[2024-07-05 00:34:01,341][49201] ConvEncoder: input_channels=3 -[2024-07-05 00:34:01,354][49231] Worker 1 uses CPU cores [2, 3] -[2024-07-05 00:34:01,354][49234] Worker 5 uses CPU cores [10, 11] -[2024-07-05 00:34:01,362][49235] Worker 4 uses CPU cores [8, 9] -[2024-07-05 00:34:01,404][49201] Conv encoder output size: 512 -[2024-07-05 00:34:01,404][49201] Policy head output size: 512 -[2024-07-05 00:34:01,413][49201] Created Actor Critic model with architecture: -[2024-07-05 00:34:01,413][49201] ActorCriticSharedWeights( +[2024-07-05 00:02:20,879][45457] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json... +[2024-07-05 00:02:20,880][45457] Rollout worker 0 uses device cpu +[2024-07-05 00:02:20,881][45457] Rollout worker 1 uses device cpu +[2024-07-05 00:02:20,881][45457] Rollout worker 2 uses device cpu +[2024-07-05 00:02:20,881][45457] Rollout worker 3 uses device cpu +[2024-07-05 00:02:20,881][45457] Rollout worker 4 uses device cpu +[2024-07-05 00:02:20,882][45457] Rollout worker 5 uses device cpu +[2024-07-05 00:02:20,882][45457] Rollout worker 6 uses device cpu +[2024-07-05 00:02:20,882][45457] Rollout worker 7 uses device cpu +[2024-07-05 00:02:20,916][45457] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 00:02:20,917][45457] InferenceWorker_p0-w0: min num requests: 2 +[2024-07-05 00:02:20,945][45457] Starting all processes... +[2024-07-05 00:02:20,945][45457] Starting process learner_proc0 +[2024-07-05 00:02:21,587][45457] Starting all processes... +[2024-07-05 00:02:21,602][45457] Starting process inference_proc0-0 +[2024-07-05 00:02:21,602][45457] Starting process rollout_proc0 +[2024-07-05 00:02:21,603][45457] Starting process rollout_proc1 +[2024-07-05 00:02:21,603][45457] Starting process rollout_proc2 +[2024-07-05 00:02:21,603][45457] Starting process rollout_proc3 +[2024-07-05 00:02:21,604][45457] Starting process rollout_proc4 +[2024-07-05 00:02:21,604][45457] Starting process rollout_proc5 +[2024-07-05 00:02:21,605][45457] Starting process rollout_proc6 +[2024-07-05 00:02:21,605][45457] Starting process rollout_proc7 +[2024-07-05 00:02:24,190][45738] Worker 4 uses CPU cores [8, 9] +[2024-07-05 00:02:24,319][45734] Worker 0 uses CPU cores [0, 1] +[2024-07-05 00:02:24,435][45735] Worker 1 uses CPU cores [2, 3] +[2024-07-05 00:02:24,496][45736] Worker 2 uses CPU cores [4, 5] +[2024-07-05 00:02:24,533][45737] Worker 3 uses CPU cores [6, 7] +[2024-07-05 00:02:24,548][45740] Worker 6 uses CPU cores [12, 13] +[2024-07-05 00:02:24,665][45741] Worker 7 uses CPU cores [14, 15] +[2024-07-05 00:02:24,689][45720] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 00:02:24,689][45720] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-07-05 00:02:24,722][45733] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 00:02:24,722][45733] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-07-05 00:02:24,740][45720] Num visible devices: 1 +[2024-07-05 00:02:24,758][45720] Setting fixed seed 200 +[2024-07-05 00:02:24,769][45720] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 00:02:24,769][45720] Initializing actor-critic model on device cuda:0 +[2024-07-05 00:02:24,769][45720] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 00:02:24,770][45720] RunningMeanStd input shape: (1,) +[2024-07-05 00:02:24,770][45733] Num visible devices: 1 +[2024-07-05 00:02:24,778][45720] Num input channels: 3 +[2024-07-05 00:02:24,790][45720] Convolutional layer output size: 4608 +[2024-07-05 00:02:24,802][45720] Policy head output size: 512 +[2024-07-05 00:02:24,860][45739] Worker 5 uses CPU cores [10, 11] +[2024-07-05 00:02:24,905][45720] Created Actor Critic model with architecture: +[2024-07-05 00:02:24,905][45720] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -31,6082 +54,67 @@ ) (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) + (basic_encoder): ResnetEncoder( + (conv_head): Sequential( + (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (2): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) + (3): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - ) - ) - ) - (core): ModelCoreRNN( - (core): GRU(512, 512) - ) - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=5, bias=True) - ) -) -[2024-07-05 00:34:01,519][49201] Using optimizer -[2024-07-05 00:34:02,109][49201] No checkpoints found -[2024-07-05 00:34:02,109][49201] Did not load from checkpoint, starting from scratch! -[2024-07-05 00:34:02,109][49201] Initialized policy 0 weights for model version 0 -[2024-07-05 00:34:02,111][49201] LearnerWorker_p0 finished initialization! -[2024-07-05 00:34:02,111][49201] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 00:34:02,176][49214] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 00:34:02,177][49214] RunningMeanStd input shape: (1,) -[2024-07-05 00:34:02,185][49214] ConvEncoder: input_channels=3 -[2024-07-05 00:34:02,238][49214] Conv encoder output size: 512 -[2024-07-05 00:34:02,238][49214] Policy head output size: 512 -[2024-07-05 00:34:02,298][49237] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 00:34:02,300][49215] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 00:34:02,300][49233] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 00:34:02,301][49235] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 00:34:02,301][49232] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 00:34:02,302][49234] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 00:34:02,302][49231] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 00:34:02,303][49236] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 00:34:02,803][49215] Decorrelating experience for 0 frames... -[2024-07-05 00:34:02,804][49235] Decorrelating experience for 0 frames... -[2024-07-05 00:34:02,804][49237] Decorrelating experience for 0 frames... -[2024-07-05 00:34:02,804][49232] Decorrelating experience for 0 frames... -[2024-07-05 00:34:02,805][49231] Decorrelating experience for 0 frames... -[2024-07-05 00:34:02,805][49234] Decorrelating experience for 0 frames... -[2024-07-05 00:34:02,965][49232] Decorrelating experience for 32 frames... -[2024-07-05 00:34:02,966][49234] Decorrelating experience for 32 frames... -[2024-07-05 00:34:02,966][49235] Decorrelating experience for 32 frames... -[2024-07-05 00:34:02,966][49237] Decorrelating experience for 32 frames... -[2024-07-05 00:34:02,966][49215] Decorrelating experience for 32 frames... -[2024-07-05 00:34:03,002][49236] Decorrelating experience for 0 frames... -[2024-07-05 00:34:03,002][49233] Decorrelating experience for 0 frames... -[2024-07-05 00:34:03,160][49236] Decorrelating experience for 32 frames... -[2024-07-05 00:34:03,161][49233] Decorrelating experience for 32 frames... -[2024-07-05 00:34:03,161][49231] Decorrelating experience for 32 frames... -[2024-07-05 00:34:03,172][49234] Decorrelating experience for 64 frames... -[2024-07-05 00:34:03,172][49232] Decorrelating experience for 64 frames... -[2024-07-05 00:34:03,332][49215] Decorrelating experience for 64 frames... -[2024-07-05 00:34:03,333][49235] Decorrelating experience for 64 frames... -[2024-07-05 00:34:03,360][49232] Decorrelating experience for 96 frames... -[2024-07-05 00:34:03,362][49234] Decorrelating experience for 96 frames... -[2024-07-05 00:34:03,368][49233] Decorrelating experience for 64 frames... -[2024-07-05 00:34:03,369][49236] Decorrelating experience for 64 frames... -[2024-07-05 00:34:03,384][49237] Decorrelating experience for 64 frames... -[2024-07-05 00:34:03,519][49231] Decorrelating experience for 64 frames... -[2024-07-05 00:34:03,546][49233] Decorrelating experience for 96 frames... -[2024-07-05 00:34:03,546][49236] Decorrelating experience for 96 frames... -[2024-07-05 00:34:03,558][49237] Decorrelating experience for 96 frames... -[2024-07-05 00:34:03,560][49215] Decorrelating experience for 96 frames... -[2024-07-05 00:34:03,746][49231] Decorrelating experience for 96 frames... -[2024-07-05 00:34:03,761][49235] Decorrelating experience for 96 frames... -[2024-07-05 00:34:04,174][49201] Signal inference workers to stop experience collection... -[2024-07-05 00:34:04,179][49214] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 00:34:05,345][49201] Signal inference workers to resume experience collection... -[2024-07-05 00:34:05,345][49214] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 00:34:07,010][49214] Updated weights for policy 0, policy_version 10 (0.0095) -[2024-07-05 00:34:09,248][49214] Updated weights for policy 0, policy_version 20 (0.0015) -[2024-07-05 00:34:11,325][49214] Updated weights for policy 0, policy_version 30 (0.0010) -[2024-07-05 00:34:13,160][49214] Updated weights for policy 0, policy_version 40 (0.0011) -[2024-07-05 00:34:14,803][49201] Saving new best policy, reward=4.281! -[2024-07-05 00:34:14,942][49214] Updated weights for policy 0, policy_version 50 (0.0007) -[2024-07-05 00:34:16,694][49214] Updated weights for policy 0, policy_version 60 (0.0009) -[2024-07-05 00:34:18,711][49214] Updated weights for policy 0, policy_version 70 (0.0014) -[2024-07-05 00:34:19,816][49201] Saving new best policy, reward=4.474! -[2024-07-05 00:34:20,572][49214] Updated weights for policy 0, policy_version 80 (0.0019) -[2024-07-05 00:34:22,601][49214] Updated weights for policy 0, policy_version 90 (0.0007) -[2024-07-05 00:34:24,397][49214] Updated weights for policy 0, policy_version 100 (0.0007) -[2024-07-05 00:34:24,803][49201] Saving new best policy, reward=4.698! -[2024-07-05 00:34:26,273][49214] Updated weights for policy 0, policy_version 110 (0.0007) -[2024-07-05 00:34:28,111][49214] Updated weights for policy 0, policy_version 120 (0.0011) -[2024-07-05 00:34:30,190][49214] Updated weights for policy 0, policy_version 130 (0.0019) -[2024-07-05 00:34:32,030][49214] Updated weights for policy 0, policy_version 140 (0.0011) -[2024-07-05 00:34:33,855][49214] Updated weights for policy 0, policy_version 150 (0.0007) -[2024-07-05 00:34:35,681][49214] Updated weights for policy 0, policy_version 160 (0.0010) -[2024-07-05 00:34:37,533][49214] Updated weights for policy 0, policy_version 170 (0.0010) -[2024-07-05 00:34:39,349][49214] Updated weights for policy 0, policy_version 180 (0.0008) -[2024-07-05 00:34:41,250][49214] Updated weights for policy 0, policy_version 190 (0.0011) -[2024-07-05 00:34:43,015][49214] Updated weights for policy 0, policy_version 200 (0.0007) -[2024-07-05 00:34:44,775][49214] Updated weights for policy 0, policy_version 210 (0.0007) -[2024-07-05 00:34:46,546][49214] Updated weights for policy 0, policy_version 220 (0.0007) -[2024-07-05 00:34:48,477][49214] Updated weights for policy 0, policy_version 230 (0.0008) -[2024-07-05 00:34:50,327][49214] Updated weights for policy 0, policy_version 240 (0.0010) -[2024-07-05 00:34:52,167][49214] Updated weights for policy 0, policy_version 250 (0.0011) -[2024-07-05 00:34:54,049][49214] Updated weights for policy 0, policy_version 260 (0.0013) -[2024-07-05 00:34:55,903][49214] Updated weights for policy 0, policy_version 270 (0.0007) -[2024-07-05 00:34:57,787][49214] Updated weights for policy 0, policy_version 280 (0.0007) -[2024-07-05 00:34:59,684][49214] Updated weights for policy 0, policy_version 290 (0.0007) -[2024-07-05 00:34:59,801][49201] Saving new best policy, reward=4.902! -[2024-07-05 00:35:01,611][49214] Updated weights for policy 0, policy_version 300 (0.0007) -[2024-07-05 00:35:03,487][49214] Updated weights for policy 0, policy_version 310 (0.0007) -[2024-07-05 00:35:04,838][49201] Saving new best policy, reward=5.150! -[2024-07-05 00:35:05,423][49214] Updated weights for policy 0, policy_version 320 (0.0017) -[2024-07-05 00:35:07,288][49214] Updated weights for policy 0, policy_version 330 (0.0013) -[2024-07-05 00:35:09,163][49214] Updated weights for policy 0, policy_version 340 (0.0015) -[2024-07-05 00:35:09,802][49201] Saving new best policy, reward=5.423! -[2024-07-05 00:35:11,110][49214] Updated weights for policy 0, policy_version 350 (0.0007) -[2024-07-05 00:35:12,949][49214] Updated weights for policy 0, policy_version 360 (0.0007) -[2024-07-05 00:35:14,796][49214] Updated weights for policy 0, policy_version 370 (0.0010) -[2024-07-05 00:35:14,803][49201] Saving new best policy, reward=6.091! -[2024-07-05 00:35:16,664][49214] Updated weights for policy 0, policy_version 380 (0.0014) -[2024-07-05 00:35:18,517][49214] Updated weights for policy 0, policy_version 390 (0.0014) -[2024-07-05 00:35:19,804][49201] Saving new best policy, reward=6.431! -[2024-07-05 00:35:20,379][49214] Updated weights for policy 0, policy_version 400 (0.0007) -[2024-07-05 00:35:22,203][49214] Updated weights for policy 0, policy_version 410 (0.0007) -[2024-07-05 00:35:24,055][49214] Updated weights for policy 0, policy_version 420 (0.0011) -[2024-07-05 00:35:24,805][49201] Saving new best policy, reward=7.559! -[2024-07-05 00:35:25,955][49214] Updated weights for policy 0, policy_version 430 (0.0011) -[2024-07-05 00:35:27,797][49214] Updated weights for policy 0, policy_version 440 (0.0007) -[2024-07-05 00:35:29,654][49214] Updated weights for policy 0, policy_version 450 (0.0007) -[2024-07-05 00:35:29,829][49201] Saving new best policy, reward=8.770! -[2024-07-05 00:35:31,548][49214] Updated weights for policy 0, policy_version 460 (0.0012) -[2024-07-05 00:35:33,354][49214] Updated weights for policy 0, policy_version 470 (0.0007) -[2024-07-05 00:35:34,863][49201] Saving new best policy, reward=9.763! -[2024-07-05 00:35:35,256][49214] Updated weights for policy 0, policy_version 480 (0.0017) -[2024-07-05 00:35:37,100][49214] Updated weights for policy 0, policy_version 490 (0.0007) -[2024-07-05 00:35:38,928][49214] Updated weights for policy 0, policy_version 500 (0.0010) -[2024-07-05 00:35:39,834][49201] Saving new best policy, reward=11.270! -[2024-07-05 00:35:40,750][49214] Updated weights for policy 0, policy_version 510 (0.0007) -[2024-07-05 00:35:42,640][49214] Updated weights for policy 0, policy_version 520 (0.0011) -[2024-07-05 00:35:44,518][49214] Updated weights for policy 0, policy_version 530 (0.0007) -[2024-07-05 00:35:46,308][49214] Updated weights for policy 0, policy_version 540 (0.0007) -[2024-07-05 00:35:48,091][49214] Updated weights for policy 0, policy_version 550 (0.0007) -[2024-07-05 00:35:49,828][49201] Saving new best policy, reward=11.387! -[2024-07-05 00:35:49,829][49214] Updated weights for policy 0, policy_version 560 (0.0010) -[2024-07-05 00:35:51,627][49214] Updated weights for policy 0, policy_version 570 (0.0010) -[2024-07-05 00:35:53,431][49214] Updated weights for policy 0, policy_version 580 (0.0007) -[2024-07-05 00:35:54,851][49201] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000000588_2408448.pth... -[2024-07-05 00:35:54,912][49201] Saving new best policy, reward=13.066! -[2024-07-05 00:35:55,233][49214] Updated weights for policy 0, policy_version 590 (0.0007) -[2024-07-05 00:35:57,022][49214] Updated weights for policy 0, policy_version 600 (0.0014) -[2024-07-05 00:35:58,810][49214] Updated weights for policy 0, policy_version 610 (0.0010) -[2024-07-05 00:35:59,801][49201] Saving new best policy, reward=15.449! -[2024-07-05 00:36:00,590][49214] Updated weights for policy 0, policy_version 620 (0.0007) -[2024-07-05 00:36:02,373][49214] Updated weights for policy 0, policy_version 630 (0.0007) -[2024-07-05 00:36:04,213][49214] Updated weights for policy 0, policy_version 640 (0.0008) -[2024-07-05 00:36:04,803][49201] Saving new best policy, reward=16.120! -[2024-07-05 00:36:06,018][49214] Updated weights for policy 0, policy_version 650 (0.0008) -[2024-07-05 00:36:07,860][49214] Updated weights for policy 0, policy_version 660 (0.0010) -[2024-07-05 00:36:09,763][49214] Updated weights for policy 0, policy_version 670 (0.0007) -[2024-07-05 00:36:09,802][49201] Saving new best policy, reward=20.173! -[2024-07-05 00:36:11,605][49214] Updated weights for policy 0, policy_version 680 (0.0013) -[2024-07-05 00:36:13,417][49214] Updated weights for policy 0, policy_version 690 (0.0010) -[2024-07-05 00:36:15,318][49214] Updated weights for policy 0, policy_version 700 (0.0011) -[2024-07-05 00:36:17,177][49214] Updated weights for policy 0, policy_version 710 (0.0010) -[2024-07-05 00:36:18,980][49214] Updated weights for policy 0, policy_version 720 (0.0011) -[2024-07-05 00:36:20,761][49214] Updated weights for policy 0, policy_version 730 (0.0006) -[2024-07-05 00:36:22,528][49214] Updated weights for policy 0, policy_version 740 (0.0007) -[2024-07-05 00:36:24,332][49214] Updated weights for policy 0, policy_version 750 (0.0007) -[2024-07-05 00:36:26,119][49214] Updated weights for policy 0, policy_version 760 (0.0010) -[2024-07-05 00:36:27,900][49214] Updated weights for policy 0, policy_version 770 (0.0007) -[2024-07-05 00:36:29,685][49214] Updated weights for policy 0, policy_version 780 (0.0013) -[2024-07-05 00:36:31,477][49214] Updated weights for policy 0, policy_version 790 (0.0010) -[2024-07-05 00:36:33,357][49214] Updated weights for policy 0, policy_version 800 (0.0007) -[2024-07-05 00:36:34,863][49201] Saving new best policy, reward=22.368! -[2024-07-05 00:36:35,239][49214] Updated weights for policy 0, policy_version 810 (0.0011) -[2024-07-05 00:36:37,014][49214] Updated weights for policy 0, policy_version 820 (0.0010) -[2024-07-05 00:36:38,854][49214] Updated weights for policy 0, policy_version 830 (0.0012) -[2024-07-05 00:36:40,659][49214] Updated weights for policy 0, policy_version 840 (0.0018) -[2024-07-05 00:36:42,480][49214] Updated weights for policy 0, policy_version 850 (0.0009) -[2024-07-05 00:36:44,232][49214] Updated weights for policy 0, policy_version 860 (0.0007) -[2024-07-05 00:36:46,068][49214] Updated weights for policy 0, policy_version 870 (0.0007) -[2024-07-05 00:36:47,608][49214] Updated weights for policy 0, policy_version 880 (0.0007) -[2024-07-05 00:36:49,028][49214] Updated weights for policy 0, policy_version 890 (0.0007) -[2024-07-05 00:36:49,801][49201] Saving new best policy, reward=22.511! -[2024-07-05 00:36:50,446][49214] Updated weights for policy 0, policy_version 900 (0.0007) -[2024-07-05 00:36:51,841][49214] Updated weights for policy 0, policy_version 910 (0.0006) -[2024-07-05 00:36:53,257][49214] Updated weights for policy 0, policy_version 920 (0.0006) -[2024-07-05 00:36:54,689][49214] Updated weights for policy 0, policy_version 930 (0.0007) -[2024-07-05 00:36:56,109][49214] Updated weights for policy 0, policy_version 940 (0.0007) -[2024-07-05 00:36:57,544][49214] Updated weights for policy 0, policy_version 950 (0.0007) -[2024-07-05 00:36:58,955][49214] Updated weights for policy 0, policy_version 960 (0.0006) -[2024-07-05 00:36:59,822][49201] Saving new best policy, reward=22.819! -[2024-07-05 00:37:00,404][49214] Updated weights for policy 0, policy_version 970 (0.0007) -[2024-07-05 00:37:01,844][49214] Updated weights for policy 0, policy_version 980 (0.0007) -[2024-07-05 00:37:03,279][49214] Updated weights for policy 0, policy_version 990 (0.0006) -[2024-07-05 00:37:04,782][49214] Updated weights for policy 0, policy_version 1000 (0.0007) -[2024-07-05 00:37:06,272][49214] Updated weights for policy 0, policy_version 1010 (0.0007) -[2024-07-05 00:37:07,730][49214] Updated weights for policy 0, policy_version 1020 (0.0007) -[2024-07-05 00:37:09,181][49214] Updated weights for policy 0, policy_version 1030 (0.0007) -[2024-07-05 00:37:10,651][49214] Updated weights for policy 0, policy_version 1040 (0.0007) -[2024-07-05 00:37:12,094][49214] Updated weights for policy 0, policy_version 1050 (0.0007) -[2024-07-05 00:37:13,535][49214] Updated weights for policy 0, policy_version 1060 (0.0007) -[2024-07-05 00:37:14,984][49214] Updated weights for policy 0, policy_version 1070 (0.0007) -[2024-07-05 00:37:16,439][49214] Updated weights for policy 0, policy_version 1080 (0.0007) -[2024-07-05 00:37:17,823][49214] Updated weights for policy 0, policy_version 1090 (0.0007) -[2024-07-05 00:37:19,227][49214] Updated weights for policy 0, policy_version 1100 (0.0007) -[2024-07-05 00:37:20,635][49214] Updated weights for policy 0, policy_version 1110 (0.0007) -[2024-07-05 00:37:22,031][49214] Updated weights for policy 0, policy_version 1120 (0.0006) -[2024-07-05 00:37:23,458][49214] Updated weights for policy 0, policy_version 1130 (0.0007) -[2024-07-05 00:37:24,863][49214] Updated weights for policy 0, policy_version 1140 (0.0006) -[2024-07-05 00:37:26,264][49214] Updated weights for policy 0, policy_version 1150 (0.0007) -[2024-07-05 00:37:27,648][49214] Updated weights for policy 0, policy_version 1160 (0.0007) -[2024-07-05 00:37:29,099][49214] Updated weights for policy 0, policy_version 1170 (0.0006) -[2024-07-05 00:37:30,601][49214] Updated weights for policy 0, policy_version 1180 (0.0007) -[2024-07-05 00:37:32,003][49214] Updated weights for policy 0, policy_version 1190 (0.0006) -[2024-07-05 00:37:33,392][49214] Updated weights for policy 0, policy_version 1200 (0.0007) -[2024-07-05 00:37:34,770][49214] Updated weights for policy 0, policy_version 1210 (0.0006) -[2024-07-05 00:37:36,259][49214] Updated weights for policy 0, policy_version 1220 (0.0007) -[2024-07-05 00:37:37,694][49214] Updated weights for policy 0, policy_version 1230 (0.0007) -[2024-07-05 00:37:39,132][49214] Updated weights for policy 0, policy_version 1240 (0.0007) -[2024-07-05 00:37:40,527][49214] Updated weights for policy 0, policy_version 1250 (0.0006) -[2024-07-05 00:37:41,946][49214] Updated weights for policy 0, policy_version 1260 (0.0007) -[2024-07-05 00:37:43,353][49214] Updated weights for policy 0, policy_version 1270 (0.0007) -[2024-07-05 00:37:44,832][49201] Saving new best policy, reward=23.004! -[2024-07-05 00:37:44,833][49214] Updated weights for policy 0, policy_version 1280 (0.0006) -[2024-07-05 00:37:46,426][49214] Updated weights for policy 0, policy_version 1290 (0.0007) -[2024-07-05 00:37:47,890][49214] Updated weights for policy 0, policy_version 1300 (0.0007) -[2024-07-05 00:37:49,325][49214] Updated weights for policy 0, policy_version 1310 (0.0007) -[2024-07-05 00:37:49,801][49201] Saving new best policy, reward=24.526! -[2024-07-05 00:37:50,775][49214] Updated weights for policy 0, policy_version 1320 (0.0007) -[2024-07-05 00:37:52,226][49214] Updated weights for policy 0, policy_version 1330 (0.0007) -[2024-07-05 00:37:53,772][49214] Updated weights for policy 0, policy_version 1340 (0.0007) -[2024-07-05 00:37:54,817][49201] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000001347_5517312.pth... -[2024-07-05 00:37:55,260][49214] Updated weights for policy 0, policy_version 1350 (0.0007) -[2024-07-05 00:37:56,718][49214] Updated weights for policy 0, policy_version 1360 (0.0007) -[2024-07-05 00:37:58,165][49214] Updated weights for policy 0, policy_version 1370 (0.0007) -[2024-07-05 00:37:59,627][49214] Updated weights for policy 0, policy_version 1380 (0.0007) -[2024-07-05 00:38:01,143][49214] Updated weights for policy 0, policy_version 1390 (0.0007) -[2024-07-05 00:38:02,624][49214] Updated weights for policy 0, policy_version 1400 (0.0007) -[2024-07-05 00:38:04,061][49214] Updated weights for policy 0, policy_version 1410 (0.0007) -[2024-07-05 00:38:05,471][49214] Updated weights for policy 0, policy_version 1420 (0.0007) -[2024-07-05 00:38:06,892][49214] Updated weights for policy 0, policy_version 1430 (0.0007) -[2024-07-05 00:38:08,341][49214] Updated weights for policy 0, policy_version 1440 (0.0007) -[2024-07-05 00:38:09,755][49214] Updated weights for policy 0, policy_version 1450 (0.0007) -[2024-07-05 00:38:11,197][49214] Updated weights for policy 0, policy_version 1460 (0.0007) -[2024-07-05 00:38:12,607][49214] Updated weights for policy 0, policy_version 1470 (0.0006) -[2024-07-05 00:38:14,040][49214] Updated weights for policy 0, policy_version 1480 (0.0006) -[2024-07-05 00:38:15,459][49214] Updated weights for policy 0, policy_version 1490 (0.0007) -[2024-07-05 00:38:16,863][49214] Updated weights for policy 0, policy_version 1500 (0.0007) -[2024-07-05 00:38:18,316][49214] Updated weights for policy 0, policy_version 1510 (0.0007) -[2024-07-05 00:38:19,742][49214] Updated weights for policy 0, policy_version 1520 (0.0006) -[2024-07-05 00:38:21,168][49214] Updated weights for policy 0, policy_version 1530 (0.0007) -[2024-07-05 00:38:22,586][49214] Updated weights for policy 0, policy_version 1540 (0.0007) -[2024-07-05 00:38:23,991][49214] Updated weights for policy 0, policy_version 1550 (0.0007) -[2024-07-05 00:38:25,403][49214] Updated weights for policy 0, policy_version 1560 (0.0006) -[2024-07-05 00:38:26,818][49214] Updated weights for policy 0, policy_version 1570 (0.0007) -[2024-07-05 00:38:28,240][49214] Updated weights for policy 0, policy_version 1580 (0.0007) -[2024-07-05 00:38:29,656][49214] Updated weights for policy 0, policy_version 1590 (0.0007) -[2024-07-05 00:38:31,094][49214] Updated weights for policy 0, policy_version 1600 (0.0006) -[2024-07-05 00:38:32,510][49214] Updated weights for policy 0, policy_version 1610 (0.0007) -[2024-07-05 00:38:33,923][49214] Updated weights for policy 0, policy_version 1620 (0.0007) -[2024-07-05 00:38:35,340][49214] Updated weights for policy 0, policy_version 1630 (0.0006) -[2024-07-05 00:38:36,737][49214] Updated weights for policy 0, policy_version 1640 (0.0006) -[2024-07-05 00:38:38,165][49214] Updated weights for policy 0, policy_version 1650 (0.0006) -[2024-07-05 00:38:39,579][49214] Updated weights for policy 0, policy_version 1660 (0.0006) -[2024-07-05 00:38:41,004][49214] Updated weights for policy 0, policy_version 1670 (0.0007) -[2024-07-05 00:38:42,416][49214] Updated weights for policy 0, policy_version 1680 (0.0006) -[2024-07-05 00:38:43,846][49214] Updated weights for policy 0, policy_version 1690 (0.0006) -[2024-07-05 00:38:45,286][49214] Updated weights for policy 0, policy_version 1700 (0.0006) -[2024-07-05 00:38:46,728][49214] Updated weights for policy 0, policy_version 1710 (0.0007) -[2024-07-05 00:38:48,131][49214] Updated weights for policy 0, policy_version 1720 (0.0007) -[2024-07-05 00:38:49,523][49214] Updated weights for policy 0, policy_version 1730 (0.0006) -[2024-07-05 00:38:49,801][49201] Saving new best policy, reward=26.917! -[2024-07-05 00:38:50,902][49214] Updated weights for policy 0, policy_version 1740 (0.0007) -[2024-07-05 00:38:52,310][49214] Updated weights for policy 0, policy_version 1750 (0.0007) -[2024-07-05 00:38:53,703][49214] Updated weights for policy 0, policy_version 1760 (0.0006) -[2024-07-05 00:38:55,105][49214] Updated weights for policy 0, policy_version 1770 (0.0007) -[2024-07-05 00:38:56,497][49214] Updated weights for policy 0, policy_version 1780 (0.0006) -[2024-07-05 00:38:57,973][49214] Updated weights for policy 0, policy_version 1790 (0.0007) -[2024-07-05 00:38:59,459][49214] Updated weights for policy 0, policy_version 1800 (0.0007) -[2024-07-05 00:39:00,856][49214] Updated weights for policy 0, policy_version 1810 (0.0006) -[2024-07-05 00:39:02,257][49214] Updated weights for policy 0, policy_version 1820 (0.0006) -[2024-07-05 00:39:03,772][49214] Updated weights for policy 0, policy_version 1830 (0.0007) -[2024-07-05 00:39:05,214][49214] Updated weights for policy 0, policy_version 1840 (0.0007) -[2024-07-05 00:39:06,715][49214] Updated weights for policy 0, policy_version 1850 (0.0007) -[2024-07-05 00:39:08,162][49214] Updated weights for policy 0, policy_version 1860 (0.0007) -[2024-07-05 00:39:09,620][49214] Updated weights for policy 0, policy_version 1870 (0.0007) -[2024-07-05 00:39:11,086][49214] Updated weights for policy 0, policy_version 1880 (0.0007) -[2024-07-05 00:39:12,558][49214] Updated weights for policy 0, policy_version 1890 (0.0007) -[2024-07-05 00:39:14,011][49214] Updated weights for policy 0, policy_version 1900 (0.0007) -[2024-07-05 00:39:15,422][49214] Updated weights for policy 0, policy_version 1910 (0.0007) -[2024-07-05 00:39:16,836][49214] Updated weights for policy 0, policy_version 1920 (0.0007) -[2024-07-05 00:39:18,243][49214] Updated weights for policy 0, policy_version 1930 (0.0006) -[2024-07-05 00:39:19,659][49214] Updated weights for policy 0, policy_version 1940 (0.0007) -[2024-07-05 00:39:21,071][49214] Updated weights for policy 0, policy_version 1950 (0.0007) -[2024-07-05 00:39:22,485][49214] Updated weights for policy 0, policy_version 1960 (0.0007) -[2024-07-05 00:39:23,899][49214] Updated weights for policy 0, policy_version 1970 (0.0006) -[2024-07-05 00:39:25,320][49214] Updated weights for policy 0, policy_version 1980 (0.0007) -[2024-07-05 00:39:26,814][49214] Updated weights for policy 0, policy_version 1990 (0.0006) -[2024-07-05 00:39:28,231][49214] Updated weights for policy 0, policy_version 2000 (0.0007) -[2024-07-05 00:39:29,650][49214] Updated weights for policy 0, policy_version 2010 (0.0006) -[2024-07-05 00:39:31,078][49214] Updated weights for policy 0, policy_version 2020 (0.0006) -[2024-07-05 00:39:32,491][49214] Updated weights for policy 0, policy_version 2030 (0.0007) -[2024-07-05 00:39:33,907][49214] Updated weights for policy 0, policy_version 2040 (0.0007) -[2024-07-05 00:39:35,321][49214] Updated weights for policy 0, policy_version 2050 (0.0006) -[2024-07-05 00:39:36,763][49214] Updated weights for policy 0, policy_version 2060 (0.0007) -[2024-07-05 00:39:38,168][49214] Updated weights for policy 0, policy_version 2070 (0.0007) -[2024-07-05 00:39:39,593][49214] Updated weights for policy 0, policy_version 2080 (0.0007) -[2024-07-05 00:39:41,017][49214] Updated weights for policy 0, policy_version 2090 (0.0007) -[2024-07-05 00:39:42,429][49214] Updated weights for policy 0, policy_version 2100 (0.0006) -[2024-07-05 00:39:43,859][49214] Updated weights for policy 0, policy_version 2110 (0.0007) -[2024-07-05 00:39:45,278][49214] Updated weights for policy 0, policy_version 2120 (0.0006) -[2024-07-05 00:39:46,697][49214] Updated weights for policy 0, policy_version 2130 (0.0006) -[2024-07-05 00:39:48,123][49214] Updated weights for policy 0, policy_version 2140 (0.0006) -[2024-07-05 00:39:49,548][49214] Updated weights for policy 0, policy_version 2150 (0.0007) -[2024-07-05 00:39:50,967][49214] Updated weights for policy 0, policy_version 2160 (0.0007) -[2024-07-05 00:39:52,392][49214] Updated weights for policy 0, policy_version 2170 (0.0007) -[2024-07-05 00:39:53,813][49214] Updated weights for policy 0, policy_version 2180 (0.0007) -[2024-07-05 00:39:54,804][49201] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000002187_8957952.pth... -[2024-07-05 00:39:54,859][49201] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000000588_2408448.pth -[2024-07-05 00:39:55,239][49214] Updated weights for policy 0, policy_version 2190 (0.0006) -[2024-07-05 00:39:56,656][49214] Updated weights for policy 0, policy_version 2200 (0.0007) -[2024-07-05 00:39:58,083][49214] Updated weights for policy 0, policy_version 2210 (0.0007) -[2024-07-05 00:39:59,522][49214] Updated weights for policy 0, policy_version 2220 (0.0007) -[2024-07-05 00:40:00,926][49214] Updated weights for policy 0, policy_version 2230 (0.0007) -[2024-07-05 00:40:02,322][49214] Updated weights for policy 0, policy_version 2240 (0.0007) -[2024-07-05 00:40:03,737][49214] Updated weights for policy 0, policy_version 2250 (0.0007) -[2024-07-05 00:40:05,122][49214] Updated weights for policy 0, policy_version 2260 (0.0007) -[2024-07-05 00:40:06,518][49214] Updated weights for policy 0, policy_version 2270 (0.0006) -[2024-07-05 00:40:07,924][49214] Updated weights for policy 0, policy_version 2280 (0.0007) -[2024-07-05 00:40:09,319][49214] Updated weights for policy 0, policy_version 2290 (0.0007) -[2024-07-05 00:40:10,721][49214] Updated weights for policy 0, policy_version 2300 (0.0007) -[2024-07-05 00:40:12,122][49214] Updated weights for policy 0, policy_version 2310 (0.0006) -[2024-07-05 00:40:13,519][49214] Updated weights for policy 0, policy_version 2320 (0.0007) -[2024-07-05 00:40:14,938][49214] Updated weights for policy 0, policy_version 2330 (0.0006) -[2024-07-05 00:40:16,324][49214] Updated weights for policy 0, policy_version 2340 (0.0007) -[2024-07-05 00:40:17,719][49214] Updated weights for policy 0, policy_version 2350 (0.0007) -[2024-07-05 00:40:19,108][49214] Updated weights for policy 0, policy_version 2360 (0.0006) -[2024-07-05 00:40:20,508][49214] Updated weights for policy 0, policy_version 2370 (0.0007) -[2024-07-05 00:40:21,909][49214] Updated weights for policy 0, policy_version 2380 (0.0007) -[2024-07-05 00:40:23,319][49214] Updated weights for policy 0, policy_version 2390 (0.0007) -[2024-07-05 00:40:24,724][49214] Updated weights for policy 0, policy_version 2400 (0.0007) -[2024-07-05 00:40:26,121][49214] Updated weights for policy 0, policy_version 2410 (0.0007) -[2024-07-05 00:40:27,527][49214] Updated weights for policy 0, policy_version 2420 (0.0007) -[2024-07-05 00:40:28,920][49214] Updated weights for policy 0, policy_version 2430 (0.0006) -[2024-07-05 00:40:30,329][49214] Updated weights for policy 0, policy_version 2440 (0.0007) -[2024-07-05 00:40:31,721][49214] Updated weights for policy 0, policy_version 2450 (0.0007) -[2024-07-05 00:40:33,133][49214] Updated weights for policy 0, policy_version 2460 (0.0007) -[2024-07-05 00:40:34,540][49214] Updated weights for policy 0, policy_version 2470 (0.0006) -[2024-07-05 00:40:35,948][49214] Updated weights for policy 0, policy_version 2480 (0.0007) -[2024-07-05 00:40:37,362][49214] Updated weights for policy 0, policy_version 2490 (0.0006) -[2024-07-05 00:40:38,762][49214] Updated weights for policy 0, policy_version 2500 (0.0007) -[2024-07-05 00:40:40,162][49214] Updated weights for policy 0, policy_version 2510 (0.0006) -[2024-07-05 00:40:41,571][49214] Updated weights for policy 0, policy_version 2520 (0.0007) -[2024-07-05 00:40:42,981][49214] Updated weights for policy 0, policy_version 2530 (0.0007) -[2024-07-05 00:40:44,388][49214] Updated weights for policy 0, policy_version 2540 (0.0007) -[2024-07-05 00:40:45,795][49214] Updated weights for policy 0, policy_version 2550 (0.0007) -[2024-07-05 00:40:47,202][49214] Updated weights for policy 0, policy_version 2560 (0.0007) -[2024-07-05 00:40:48,597][49214] Updated weights for policy 0, policy_version 2570 (0.0007) -[2024-07-05 00:40:50,006][49214] Updated weights for policy 0, policy_version 2580 (0.0006) -[2024-07-05 00:40:51,402][49214] Updated weights for policy 0, policy_version 2590 (0.0007) -[2024-07-05 00:40:52,929][49214] Updated weights for policy 0, policy_version 2600 (0.0007) -[2024-07-05 00:40:54,477][49214] Updated weights for policy 0, policy_version 2610 (0.0007) -[2024-07-05 00:40:55,993][49214] Updated weights for policy 0, policy_version 2620 (0.0007) -[2024-07-05 00:40:57,556][49214] Updated weights for policy 0, policy_version 2630 (0.0007) -[2024-07-05 00:40:59,123][49214] Updated weights for policy 0, policy_version 2640 (0.0007) -[2024-07-05 00:41:00,566][49214] Updated weights for policy 0, policy_version 2650 (0.0007) -[2024-07-05 00:41:01,985][49214] Updated weights for policy 0, policy_version 2660 (0.0006) -[2024-07-05 00:41:03,475][49214] Updated weights for policy 0, policy_version 2670 (0.0007) -[2024-07-05 00:41:05,041][49214] Updated weights for policy 0, policy_version 2680 (0.0007) -[2024-07-05 00:41:06,497][49214] Updated weights for policy 0, policy_version 2690 (0.0007) -[2024-07-05 00:41:07,965][49214] Updated weights for policy 0, policy_version 2700 (0.0007) -[2024-07-05 00:41:09,473][49214] Updated weights for policy 0, policy_version 2710 (0.0007) -[2024-07-05 00:41:10,949][49214] Updated weights for policy 0, policy_version 2720 (0.0007) -[2024-07-05 00:41:12,398][49214] Updated weights for policy 0, policy_version 2730 (0.0007) -[2024-07-05 00:41:13,844][49214] Updated weights for policy 0, policy_version 2740 (0.0006) -[2024-07-05 00:41:15,327][49214] Updated weights for policy 0, policy_version 2750 (0.0007) -[2024-07-05 00:41:16,820][49214] Updated weights for policy 0, policy_version 2760 (0.0007) -[2024-07-05 00:41:18,258][49214] Updated weights for policy 0, policy_version 2770 (0.0007) -[2024-07-05 00:41:19,718][49214] Updated weights for policy 0, policy_version 2780 (0.0007) -[2024-07-05 00:41:21,211][49214] Updated weights for policy 0, policy_version 2790 (0.0007) -[2024-07-05 00:41:22,664][49214] Updated weights for policy 0, policy_version 2800 (0.0007) -[2024-07-05 00:41:24,106][49214] Updated weights for policy 0, policy_version 2810 (0.0006) -[2024-07-05 00:41:25,562][49214] Updated weights for policy 0, policy_version 2820 (0.0007) -[2024-07-05 00:41:27,019][49214] Updated weights for policy 0, policy_version 2830 (0.0007) -[2024-07-05 00:41:28,461][49214] Updated weights for policy 0, policy_version 2840 (0.0006) -[2024-07-05 00:41:29,940][49214] Updated weights for policy 0, policy_version 2850 (0.0007) -[2024-07-05 00:41:31,341][49214] Updated weights for policy 0, policy_version 2860 (0.0006) -[2024-07-05 00:41:32,720][49214] Updated weights for policy 0, policy_version 2870 (0.0006) -[2024-07-05 00:41:34,111][49214] Updated weights for policy 0, policy_version 2880 (0.0007) -[2024-07-05 00:41:35,514][49214] Updated weights for policy 0, policy_version 2890 (0.0007) -[2024-07-05 00:41:36,895][49214] Updated weights for policy 0, policy_version 2900 (0.0006) -[2024-07-05 00:41:38,287][49214] Updated weights for policy 0, policy_version 2910 (0.0007) -[2024-07-05 00:41:39,657][49214] Updated weights for policy 0, policy_version 2920 (0.0007) -[2024-07-05 00:41:39,801][49201] Saving new best policy, reward=28.407! -[2024-07-05 00:41:41,055][49214] Updated weights for policy 0, policy_version 2930 (0.0007) -[2024-07-05 00:41:42,431][49214] Updated weights for policy 0, policy_version 2940 (0.0007) -[2024-07-05 00:41:43,814][49214] Updated weights for policy 0, policy_version 2950 (0.0007) -[2024-07-05 00:41:45,218][49214] Updated weights for policy 0, policy_version 2960 (0.0007) -[2024-07-05 00:41:46,603][49214] Updated weights for policy 0, policy_version 2970 (0.0006) -[2024-07-05 00:41:47,990][49214] Updated weights for policy 0, policy_version 2980 (0.0006) -[2024-07-05 00:41:49,371][49214] Updated weights for policy 0, policy_version 2990 (0.0007) -[2024-07-05 00:41:50,737][49214] Updated weights for policy 0, policy_version 3000 (0.0007) -[2024-07-05 00:41:52,144][49214] Updated weights for policy 0, policy_version 3010 (0.0006) -[2024-07-05 00:41:53,568][49214] Updated weights for policy 0, policy_version 3020 (0.0007) -[2024-07-05 00:41:54,804][49201] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000003028_12402688.pth... -[2024-07-05 00:41:54,860][49201] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000001347_5517312.pth -[2024-07-05 00:41:55,019][49214] Updated weights for policy 0, policy_version 3030 (0.0007) -[2024-07-05 00:41:56,453][49214] Updated weights for policy 0, policy_version 3040 (0.0006) -[2024-07-05 00:41:57,862][49214] Updated weights for policy 0, policy_version 3050 (0.0007) -[2024-07-05 00:41:59,343][49214] Updated weights for policy 0, policy_version 3060 (0.0007) -[2024-07-05 00:42:00,833][49214] Updated weights for policy 0, policy_version 3070 (0.0007) -[2024-07-05 00:42:02,263][49214] Updated weights for policy 0, policy_version 3080 (0.0007) -[2024-07-05 00:42:03,668][49214] Updated weights for policy 0, policy_version 3090 (0.0007) -[2024-07-05 00:42:05,079][49214] Updated weights for policy 0, policy_version 3100 (0.0007) -[2024-07-05 00:42:06,469][49214] Updated weights for policy 0, policy_version 3110 (0.0007) -[2024-07-05 00:42:07,864][49214] Updated weights for policy 0, policy_version 3120 (0.0007) -[2024-07-05 00:42:09,257][49214] Updated weights for policy 0, policy_version 3130 (0.0006) -[2024-07-05 00:42:10,638][49214] Updated weights for policy 0, policy_version 3140 (0.0006) -[2024-07-05 00:42:12,051][49214] Updated weights for policy 0, policy_version 3150 (0.0006) -[2024-07-05 00:42:13,446][49214] Updated weights for policy 0, policy_version 3160 (0.0007) -[2024-07-05 00:42:14,803][49201] Saving new best policy, reward=29.022! -[2024-07-05 00:42:14,898][49214] Updated weights for policy 0, policy_version 3170 (0.0007) -[2024-07-05 00:42:16,250][49214] Updated weights for policy 0, policy_version 3180 (0.0007) -[2024-07-05 00:42:17,648][49214] Updated weights for policy 0, policy_version 3190 (0.0006) -[2024-07-05 00:42:19,049][49214] Updated weights for policy 0, policy_version 3200 (0.0006) -[2024-07-05 00:42:20,460][49214] Updated weights for policy 0, policy_version 3210 (0.0006) -[2024-07-05 00:42:21,863][49214] Updated weights for policy 0, policy_version 3220 (0.0007) -[2024-07-05 00:42:23,265][49214] Updated weights for policy 0, policy_version 3230 (0.0007) -[2024-07-05 00:42:24,674][49214] Updated weights for policy 0, policy_version 3240 (0.0007) -[2024-07-05 00:42:26,081][49214] Updated weights for policy 0, policy_version 3250 (0.0007) -[2024-07-05 00:42:27,499][49214] Updated weights for policy 0, policy_version 3260 (0.0007) -[2024-07-05 00:42:28,897][49214] Updated weights for policy 0, policy_version 3270 (0.0007) -[2024-07-05 00:42:30,296][49214] Updated weights for policy 0, policy_version 3280 (0.0007) -[2024-07-05 00:42:31,680][49214] Updated weights for policy 0, policy_version 3290 (0.0006) -[2024-07-05 00:42:33,085][49214] Updated weights for policy 0, policy_version 3300 (0.0006) -[2024-07-05 00:42:34,474][49214] Updated weights for policy 0, policy_version 3310 (0.0006) -[2024-07-05 00:42:35,885][49214] Updated weights for policy 0, policy_version 3320 (0.0007) -[2024-07-05 00:42:37,277][49214] Updated weights for policy 0, policy_version 3330 (0.0007) -[2024-07-05 00:42:38,689][49214] Updated weights for policy 0, policy_version 3340 (0.0007) -[2024-07-05 00:42:40,082][49214] Updated weights for policy 0, policy_version 3350 (0.0007) -[2024-07-05 00:42:41,491][49214] Updated weights for policy 0, policy_version 3360 (0.0006) -[2024-07-05 00:42:42,913][49214] Updated weights for policy 0, policy_version 3370 (0.0007) -[2024-07-05 00:42:44,333][49214] Updated weights for policy 0, policy_version 3380 (0.0007) -[2024-07-05 00:42:45,717][49214] Updated weights for policy 0, policy_version 3390 (0.0006) -[2024-07-05 00:42:47,122][49214] Updated weights for policy 0, policy_version 3400 (0.0006) -[2024-07-05 00:42:48,532][49214] Updated weights for policy 0, policy_version 3410 (0.0006) -[2024-07-05 00:42:49,931][49214] Updated weights for policy 0, policy_version 3420 (0.0006) -[2024-07-05 00:42:51,322][49214] Updated weights for policy 0, policy_version 3430 (0.0006) -[2024-07-05 00:42:52,708][49214] Updated weights for policy 0, policy_version 3440 (0.0006) -[2024-07-05 00:42:54,109][49214] Updated weights for policy 0, policy_version 3450 (0.0007) -[2024-07-05 00:42:55,501][49214] Updated weights for policy 0, policy_version 3460 (0.0006) -[2024-07-05 00:42:56,896][49214] Updated weights for policy 0, policy_version 3470 (0.0007) -[2024-07-05 00:42:58,301][49214] Updated weights for policy 0, policy_version 3480 (0.0007) -[2024-07-05 00:42:59,685][49214] Updated weights for policy 0, policy_version 3490 (0.0006) -[2024-07-05 00:43:01,098][49214] Updated weights for policy 0, policy_version 3500 (0.0007) -[2024-07-05 00:43:02,485][49214] Updated weights for policy 0, policy_version 3510 (0.0006) -[2024-07-05 00:43:03,867][49214] Updated weights for policy 0, policy_version 3520 (0.0007) -[2024-07-05 00:43:04,803][49201] Saving new best policy, reward=29.797! -[2024-07-05 00:43:05,293][49214] Updated weights for policy 0, policy_version 3530 (0.0007) -[2024-07-05 00:43:06,681][49214] Updated weights for policy 0, policy_version 3540 (0.0007) -[2024-07-05 00:43:08,082][49214] Updated weights for policy 0, policy_version 3550 (0.0007) -[2024-07-05 00:43:09,474][49214] Updated weights for policy 0, policy_version 3560 (0.0007) -[2024-07-05 00:43:09,801][49201] Saving new best policy, reward=30.006! -[2024-07-05 00:43:10,858][49214] Updated weights for policy 0, policy_version 3570 (0.0006) -[2024-07-05 00:43:12,268][49214] Updated weights for policy 0, policy_version 3580 (0.0007) -[2024-07-05 00:43:13,658][49214] Updated weights for policy 0, policy_version 3590 (0.0007) -[2024-07-05 00:43:15,044][49214] Updated weights for policy 0, policy_version 3600 (0.0007) -[2024-07-05 00:43:16,438][49214] Updated weights for policy 0, policy_version 3610 (0.0007) -[2024-07-05 00:43:17,837][49214] Updated weights for policy 0, policy_version 3620 (0.0007) -[2024-07-05 00:43:19,228][49214] Updated weights for policy 0, policy_version 3630 (0.0007) -[2024-07-05 00:43:20,621][49214] Updated weights for policy 0, policy_version 3640 (0.0006) -[2024-07-05 00:43:22,017][49214] Updated weights for policy 0, policy_version 3650 (0.0006) -[2024-07-05 00:43:23,405][49214] Updated weights for policy 0, policy_version 3660 (0.0006) -[2024-07-05 00:43:24,800][49214] Updated weights for policy 0, policy_version 3670 (0.0007) -[2024-07-05 00:43:26,198][49214] Updated weights for policy 0, policy_version 3680 (0.0006) -[2024-07-05 00:43:27,596][49214] Updated weights for policy 0, policy_version 3690 (0.0007) -[2024-07-05 00:43:28,982][49214] Updated weights for policy 0, policy_version 3700 (0.0007) -[2024-07-05 00:43:30,374][49214] Updated weights for policy 0, policy_version 3710 (0.0006) -[2024-07-05 00:43:31,770][49214] Updated weights for policy 0, policy_version 3720 (0.0007) -[2024-07-05 00:43:33,150][49214] Updated weights for policy 0, policy_version 3730 (0.0006) -[2024-07-05 00:43:34,571][49214] Updated weights for policy 0, policy_version 3740 (0.0007) -[2024-07-05 00:43:34,803][49201] Saving new best policy, reward=30.526! -[2024-07-05 00:43:36,008][49214] Updated weights for policy 0, policy_version 3750 (0.0006) -[2024-07-05 00:43:37,705][49214] Updated weights for policy 0, policy_version 3760 (0.0007) -[2024-07-05 00:43:39,255][49214] Updated weights for policy 0, policy_version 3770 (0.0007) -[2024-07-05 00:43:40,819][49214] Updated weights for policy 0, policy_version 3780 (0.0007) -[2024-07-05 00:43:42,292][49214] Updated weights for policy 0, policy_version 3790 (0.0007) -[2024-07-05 00:43:43,740][49214] Updated weights for policy 0, policy_version 3800 (0.0006) -[2024-07-05 00:43:45,203][49214] Updated weights for policy 0, policy_version 3810 (0.0007) -[2024-07-05 00:43:46,681][49214] Updated weights for policy 0, policy_version 3820 (0.0007) -[2024-07-05 00:43:48,139][49214] Updated weights for policy 0, policy_version 3830 (0.0007) -[2024-07-05 00:43:49,587][49214] Updated weights for policy 0, policy_version 3840 (0.0007) -[2024-07-05 00:43:51,039][49214] Updated weights for policy 0, policy_version 3850 (0.0006) -[2024-07-05 00:43:52,451][49214] Updated weights for policy 0, policy_version 3860 (0.0007) -[2024-07-05 00:43:53,865][49214] Updated weights for policy 0, policy_version 3870 (0.0007) -[2024-07-05 00:43:54,803][49201] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000003876_15876096.pth... -[2024-07-05 00:43:54,861][49201] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000002187_8957952.pth -[2024-07-05 00:43:55,304][49214] Updated weights for policy 0, policy_version 3880 (0.0007) -[2024-07-05 00:43:56,726][49214] Updated weights for policy 0, policy_version 3890 (0.0007) -[2024-07-05 00:43:58,163][49214] Updated weights for policy 0, policy_version 3900 (0.0007) -[2024-07-05 00:43:59,559][49214] Updated weights for policy 0, policy_version 3910 (0.0007) -[2024-07-05 00:43:59,801][49201] Saving new best policy, reward=31.449! -[2024-07-05 00:44:00,949][49214] Updated weights for policy 0, policy_version 3920 (0.0006) -[2024-07-05 00:44:02,322][49214] Updated weights for policy 0, policy_version 3930 (0.0007) -[2024-07-05 00:44:03,723][49214] Updated weights for policy 0, policy_version 3940 (0.0007) -[2024-07-05 00:44:04,803][49201] Saving new best policy, reward=34.074! -[2024-07-05 00:44:05,155][49214] Updated weights for policy 0, policy_version 3950 (0.0007) -[2024-07-05 00:44:06,554][49214] Updated weights for policy 0, policy_version 3960 (0.0006) -[2024-07-05 00:44:07,970][49214] Updated weights for policy 0, policy_version 3970 (0.0007) -[2024-07-05 00:44:09,397][49214] Updated weights for policy 0, policy_version 3980 (0.0007) -[2024-07-05 00:44:10,811][49214] Updated weights for policy 0, policy_version 3990 (0.0007) -[2024-07-05 00:44:12,238][49214] Updated weights for policy 0, policy_version 4000 (0.0006) -[2024-07-05 00:44:13,636][49214] Updated weights for policy 0, policy_version 4010 (0.0007) -[2024-07-05 00:44:15,058][49214] Updated weights for policy 0, policy_version 4020 (0.0006) -[2024-07-05 00:44:16,470][49214] Updated weights for policy 0, policy_version 4030 (0.0006) -[2024-07-05 00:44:17,901][49214] Updated weights for policy 0, policy_version 4040 (0.0006) -[2024-07-05 00:44:19,306][49214] Updated weights for policy 0, policy_version 4050 (0.0007) -[2024-07-05 00:44:20,704][49214] Updated weights for policy 0, policy_version 4060 (0.0006) -[2024-07-05 00:44:22,117][49214] Updated weights for policy 0, policy_version 4070 (0.0007) -[2024-07-05 00:44:23,531][49214] Updated weights for policy 0, policy_version 4080 (0.0006) -[2024-07-05 00:44:24,948][49214] Updated weights for policy 0, policy_version 4090 (0.0007) -[2024-07-05 00:44:26,350][49214] Updated weights for policy 0, policy_version 4100 (0.0007) -[2024-07-05 00:44:27,753][49214] Updated weights for policy 0, policy_version 4110 (0.0007) -[2024-07-05 00:44:29,160][49214] Updated weights for policy 0, policy_version 4120 (0.0006) -[2024-07-05 00:44:30,568][49214] Updated weights for policy 0, policy_version 4130 (0.0006) -[2024-07-05 00:44:31,969][49214] Updated weights for policy 0, policy_version 4140 (0.0006) -[2024-07-05 00:44:33,392][49214] Updated weights for policy 0, policy_version 4150 (0.0007) -[2024-07-05 00:44:34,824][49214] Updated weights for policy 0, policy_version 4160 (0.0007) -[2024-07-05 00:44:36,259][49214] Updated weights for policy 0, policy_version 4170 (0.0007) -[2024-07-05 00:44:37,689][49214] Updated weights for policy 0, policy_version 4180 (0.0007) -[2024-07-05 00:44:39,125][49214] Updated weights for policy 0, policy_version 4190 (0.0007) -[2024-07-05 00:44:40,530][49214] Updated weights for policy 0, policy_version 4200 (0.0006) -[2024-07-05 00:44:41,948][49214] Updated weights for policy 0, policy_version 4210 (0.0006) -[2024-07-05 00:44:43,370][49214] Updated weights for policy 0, policy_version 4220 (0.0007) -[2024-07-05 00:44:44,764][49214] Updated weights for policy 0, policy_version 4230 (0.0007) -[2024-07-05 00:44:46,185][49214] Updated weights for policy 0, policy_version 4240 (0.0007) -[2024-07-05 00:44:47,611][49214] Updated weights for policy 0, policy_version 4250 (0.0008) -[2024-07-05 00:44:49,029][49214] Updated weights for policy 0, policy_version 4260 (0.0007) -[2024-07-05 00:44:50,453][49214] Updated weights for policy 0, policy_version 4270 (0.0007) -[2024-07-05 00:44:51,870][49214] Updated weights for policy 0, policy_version 4280 (0.0007) -[2024-07-05 00:44:53,243][49214] Updated weights for policy 0, policy_version 4290 (0.0007) -[2024-07-05 00:44:54,650][49214] Updated weights for policy 0, policy_version 4300 (0.0006) -[2024-07-05 00:44:56,047][49214] Updated weights for policy 0, policy_version 4310 (0.0006) -[2024-07-05 00:44:57,447][49214] Updated weights for policy 0, policy_version 4320 (0.0007) -[2024-07-05 00:44:58,847][49214] Updated weights for policy 0, policy_version 4330 (0.0007) -[2024-07-05 00:45:00,245][49214] Updated weights for policy 0, policy_version 4340 (0.0007) -[2024-07-05 00:45:01,644][49214] Updated weights for policy 0, policy_version 4350 (0.0007) -[2024-07-05 00:45:02,994][49214] Updated weights for policy 0, policy_version 4360 (0.0007) -[2024-07-05 00:45:04,405][49214] Updated weights for policy 0, policy_version 4370 (0.0007) -[2024-07-05 00:45:05,792][49214] Updated weights for policy 0, policy_version 4380 (0.0007) -[2024-07-05 00:45:07,217][49214] Updated weights for policy 0, policy_version 4390 (0.0006) -[2024-07-05 00:45:08,619][49214] Updated weights for policy 0, policy_version 4400 (0.0007) -[2024-07-05 00:45:10,016][49214] Updated weights for policy 0, policy_version 4410 (0.0007) -[2024-07-05 00:45:11,400][49214] Updated weights for policy 0, policy_version 4420 (0.0007) -[2024-07-05 00:45:12,803][49214] Updated weights for policy 0, policy_version 4430 (0.0007) -[2024-07-05 00:45:14,224][49214] Updated weights for policy 0, policy_version 4440 (0.0007) -[2024-07-05 00:45:15,623][49214] Updated weights for policy 0, policy_version 4450 (0.0007) -[2024-07-05 00:45:17,047][49214] Updated weights for policy 0, policy_version 4460 (0.0007) -[2024-07-05 00:45:18,466][49214] Updated weights for policy 0, policy_version 4470 (0.0007) -[2024-07-05 00:45:19,902][49214] Updated weights for policy 0, policy_version 4480 (0.0006) -[2024-07-05 00:45:21,325][49214] Updated weights for policy 0, policy_version 4490 (0.0006) -[2024-07-05 00:45:22,726][49214] Updated weights for policy 0, policy_version 4500 (0.0007) -[2024-07-05 00:45:24,150][49214] Updated weights for policy 0, policy_version 4510 (0.0007) -[2024-07-05 00:45:25,585][49214] Updated weights for policy 0, policy_version 4520 (0.0006) -[2024-07-05 00:45:27,014][49214] Updated weights for policy 0, policy_version 4530 (0.0006) -[2024-07-05 00:45:28,425][49214] Updated weights for policy 0, policy_version 4540 (0.0007) -[2024-07-05 00:45:29,855][49214] Updated weights for policy 0, policy_version 4550 (0.0006) -[2024-07-05 00:45:31,261][49214] Updated weights for policy 0, policy_version 4560 (0.0006) -[2024-07-05 00:45:32,690][49214] Updated weights for policy 0, policy_version 4570 (0.0007) -[2024-07-05 00:45:34,100][49214] Updated weights for policy 0, policy_version 4580 (0.0007) -[2024-07-05 00:45:35,504][49214] Updated weights for policy 0, policy_version 4590 (0.0007) -[2024-07-05 00:45:36,926][49214] Updated weights for policy 0, policy_version 4600 (0.0007) -[2024-07-05 00:45:38,342][49214] Updated weights for policy 0, policy_version 4610 (0.0007) -[2024-07-05 00:45:39,758][49214] Updated weights for policy 0, policy_version 4620 (0.0007) -[2024-07-05 00:45:41,169][49214] Updated weights for policy 0, policy_version 4630 (0.0007) -[2024-07-05 00:45:42,597][49214] Updated weights for policy 0, policy_version 4640 (0.0007) -[2024-07-05 00:45:44,006][49214] Updated weights for policy 0, policy_version 4650 (0.0006) -[2024-07-05 00:45:45,407][49214] Updated weights for policy 0, policy_version 4660 (0.0006) -[2024-07-05 00:45:46,827][49214] Updated weights for policy 0, policy_version 4670 (0.0006) -[2024-07-05 00:45:48,233][49214] Updated weights for policy 0, policy_version 4680 (0.0007) -[2024-07-05 00:45:49,633][49214] Updated weights for policy 0, policy_version 4690 (0.0006) -[2024-07-05 00:45:51,040][49214] Updated weights for policy 0, policy_version 4700 (0.0006) -[2024-07-05 00:45:52,451][49214] Updated weights for policy 0, policy_version 4710 (0.0007) -[2024-07-05 00:45:53,872][49214] Updated weights for policy 0, policy_version 4720 (0.0006) -[2024-07-05 00:45:54,803][49201] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004726_19357696.pth... -[2024-07-05 00:45:54,860][49201] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000003028_12402688.pth -[2024-07-05 00:45:55,291][49214] Updated weights for policy 0, policy_version 4730 (0.0006) -[2024-07-05 00:45:56,698][49214] Updated weights for policy 0, policy_version 4740 (0.0007) -[2024-07-05 00:45:58,110][49214] Updated weights for policy 0, policy_version 4750 (0.0006) -[2024-07-05 00:45:59,520][49214] Updated weights for policy 0, policy_version 4760 (0.0006) -[2024-07-05 00:46:00,937][49214] Updated weights for policy 0, policy_version 4770 (0.0007) -[2024-07-05 00:46:02,349][49214] Updated weights for policy 0, policy_version 4780 (0.0007) -[2024-07-05 00:46:03,761][49214] Updated weights for policy 0, policy_version 4790 (0.0007) -[2024-07-05 00:46:05,171][49214] Updated weights for policy 0, policy_version 4800 (0.0007) -[2024-07-05 00:46:06,577][49214] Updated weights for policy 0, policy_version 4810 (0.0007) -[2024-07-05 00:46:08,003][49214] Updated weights for policy 0, policy_version 4820 (0.0007) -[2024-07-05 00:46:09,423][49214] Updated weights for policy 0, policy_version 4830 (0.0006) -[2024-07-05 00:46:10,830][49214] Updated weights for policy 0, policy_version 4840 (0.0007) -[2024-07-05 00:46:12,260][49214] Updated weights for policy 0, policy_version 4850 (0.0007) -[2024-07-05 00:46:13,679][49214] Updated weights for policy 0, policy_version 4860 (0.0007) -[2024-07-05 00:46:14,807][49201] Saving new best policy, reward=34.624! -[2024-07-05 00:46:15,098][49214] Updated weights for policy 0, policy_version 4870 (0.0007) -[2024-07-05 00:46:16,519][49214] Updated weights for policy 0, policy_version 4880 (0.0006) -[2024-07-05 00:46:17,083][49201] Stopping Batcher_0... -[2024-07-05 00:46:17,084][49201] Loop batcher_evt_loop terminating... -[2024-07-05 00:46:17,084][49201] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth... -[2024-07-05 00:46:17,094][49237] Stopping RolloutWorker_w7... -[2024-07-05 00:46:17,095][49232] Stopping RolloutWorker_w2... -[2024-07-05 00:46:17,095][49236] Stopping RolloutWorker_w6... -[2024-07-05 00:46:17,095][49237] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 00:46:17,095][49234] Stopping RolloutWorker_w5... -[2024-07-05 00:46:17,095][49232] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 00:46:17,095][49231] Stopping RolloutWorker_w1... -[2024-07-05 00:46:17,095][49236] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 00:46:17,095][49235] Stopping RolloutWorker_w4... -[2024-07-05 00:46:17,095][49234] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 00:46:17,095][49231] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 00:46:17,095][49235] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 00:46:17,096][49233] Stopping RolloutWorker_w3... -[2024-07-05 00:46:17,096][49215] Stopping RolloutWorker_w0... -[2024-07-05 00:46:17,096][49233] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 00:46:17,096][49215] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 00:46:17,114][49214] Weights refcount: 2 0 -[2024-07-05 00:46:17,116][49214] Stopping InferenceWorker_p0-w0... -[2024-07-05 00:46:17,116][49214] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 00:46:17,148][49201] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000003876_15876096.pth -[2024-07-05 00:46:17,158][49201] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth... -[2024-07-05 00:46:17,249][49201] Stopping LearnerWorker_p0... -[2024-07-05 00:46:17,250][49201] Loop learner_proc0_evt_loop terminating... -[2024-07-05 11:08:07,528][25826] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 11:08:07,537][25826] Rollout worker 0 uses device cpu -[2024-07-05 11:08:07,538][25826] Rollout worker 1 uses device cpu -[2024-07-05 11:08:07,538][25826] Rollout worker 2 uses device cpu -[2024-07-05 11:08:07,539][25826] Rollout worker 3 uses device cpu -[2024-07-05 11:08:07,539][25826] Rollout worker 4 uses device cpu -[2024-07-05 11:08:07,539][25826] Rollout worker 5 uses device cpu -[2024-07-05 11:08:07,540][25826] Rollout worker 6 uses device cpu -[2024-07-05 11:08:07,540][25826] Rollout worker 7 uses device cpu -[2024-07-05 11:08:07,540][25826] Rollout worker 8 uses device cpu -[2024-07-05 11:08:07,541][25826] Rollout worker 9 uses device cpu -[2024-07-05 11:08:07,541][25826] Rollout worker 10 uses device cpu -[2024-07-05 11:08:07,541][25826] Rollout worker 11 uses device cpu -[2024-07-05 11:08:07,542][25826] Rollout worker 12 uses device cpu -[2024-07-05 11:08:07,542][25826] Rollout worker 13 uses device cpu -[2024-07-05 11:08:07,542][25826] Rollout worker 14 uses device cpu -[2024-07-05 11:08:07,543][25826] Rollout worker 15 uses device cpu -[2024-07-05 11:08:07,635][25826] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:08:07,636][25826] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 11:08:07,734][25826] Starting all processes... -[2024-07-05 11:08:07,734][25826] Starting process learner_proc0 -[2024-07-05 11:08:08,284][25826] Starting all processes... -[2024-07-05 11:08:08,290][25826] Starting process inference_proc0-0 -[2024-07-05 11:08:08,291][25826] Starting process rollout_proc0 -[2024-07-05 11:08:08,291][25826] Starting process rollout_proc1 -[2024-07-05 11:08:08,291][25826] Starting process rollout_proc2 -[2024-07-05 11:08:08,292][25826] Starting process rollout_proc3 -[2024-07-05 11:08:08,292][25826] Starting process rollout_proc4 -[2024-07-05 11:08:08,294][25826] Starting process rollout_proc5 -[2024-07-05 11:08:08,295][25826] Starting process rollout_proc6 -[2024-07-05 11:08:08,298][25826] Starting process rollout_proc7 -[2024-07-05 11:08:08,298][25826] Starting process rollout_proc8 -[2024-07-05 11:08:08,298][25826] Starting process rollout_proc9 -[2024-07-05 11:08:08,299][25826] Starting process rollout_proc10 -[2024-07-05 11:08:08,300][25826] Starting process rollout_proc11 -[2024-07-05 11:08:08,301][25826] Starting process rollout_proc12 -[2024-07-05 11:08:08,301][25826] Starting process rollout_proc13 -[2024-07-05 11:08:08,302][25826] Starting process rollout_proc14 -[2024-07-05 11:08:08,318][25826] Starting process rollout_proc15 -[2024-07-05 11:08:12,170][26047] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:08:12,170][26047] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 11:08:12,294][26047] Num visible devices: 1 -[2024-07-05 11:08:12,348][26074] Worker 6 uses CPU cores [6] -[2024-07-05 11:08:12,360][26095] Worker 13 uses CPU cores [13] -[2024-07-05 11:08:12,361][26047] Setting fixed seed 200 -[2024-07-05 11:08:12,362][26047] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:08:12,362][26047] Initializing actor-critic model on device cuda:0 -[2024-07-05 11:08:12,363][26047] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:08:12,363][26047] RunningMeanStd input shape: (1,) -[2024-07-05 11:08:12,375][26047] ConvEncoder: input_channels=3 -[2024-07-05 11:08:12,426][26068] Worker 0 uses CPU cores [0] -[2024-07-05 11:08:12,436][26070] Worker 2 uses CPU cores [2] -[2024-07-05 11:08:12,502][26067] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:08:12,502][26067] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 11:08:12,539][26079] Worker 9 uses CPU cores [9] -[2024-07-05 11:08:12,548][26047] Conv encoder output size: 512 -[2024-07-05 11:08:12,548][26047] Policy head output size: 512 -[2024-07-05 11:08:12,561][26047] Created Actor Critic model with architecture: -[2024-07-05 11:08:12,562][26047] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) - ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) - ) - ) - ) - ) - (core): ModelCoreRNN( - (core): GRU(512, 512) - ) - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=5, bias=True) - ) -) -[2024-07-05 11:08:12,568][26067] Num visible devices: 1 -[2024-07-05 11:08:12,589][26069] Worker 1 uses CPU cores [1] -[2024-07-05 11:08:12,616][26071] Worker 3 uses CPU cores [3] -[2024-07-05 11:08:12,648][26072] Worker 4 uses CPU cores [4] -[2024-07-05 11:08:12,660][26076] Worker 8 uses CPU cores [8] -[2024-07-05 11:08:12,675][26047] Using optimizer -[2024-07-05 11:08:12,688][26075] Worker 7 uses CPU cores [7] -[2024-07-05 11:08:12,788][26096] Worker 12 uses CPU cores [12] -[2024-07-05 11:08:12,798][26098] Worker 15 uses CPU cores [15] -[2024-07-05 11:08:12,827][26077] Worker 11 uses CPU cores [11] -[2024-07-05 11:08:12,837][26078] Worker 10 uses CPU cores [10] -[2024-07-05 11:08:12,861][26073] Worker 5 uses CPU cores [5] -[2024-07-05 11:08:12,993][26097] Worker 14 uses CPU cores [14] -[2024-07-05 11:08:13,220][26047] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth... -[2024-07-05 11:08:13,274][26047] Loading model from checkpoint -[2024-07-05 11:08:13,275][26047] Loaded experiment state at self.train_step=4884, self.env_steps=20004864 -[2024-07-05 11:08:13,275][26047] Initialized policy 0 weights for model version 4884 -[2024-07-05 11:08:13,277][26047] LearnerWorker_p0 finished initialization! -[2024-07-05 11:08:13,277][26047] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:08:13,340][26067] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:08:13,340][26067] RunningMeanStd input shape: (1,) -[2024-07-05 11:08:13,347][26067] ConvEncoder: input_channels=3 -[2024-07-05 11:08:13,401][26067] Conv encoder output size: 512 -[2024-07-05 11:08:13,402][26067] Policy head output size: 512 -[2024-07-05 11:08:13,436][25826] Inference worker 0-0 is ready! -[2024-07-05 11:08:13,437][25826] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 11:08:13,482][26073] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,484][26068] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,485][26098] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,485][26095] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,486][26076] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,486][26079] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,488][26096] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,490][26077] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,492][26097] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,494][26075] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,498][26070] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,500][26071] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,504][26069] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,508][26072] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,508][26074] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:13,517][26078] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:14,086][26068] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,087][26073] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,087][26098] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,090][26071] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,090][26077] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,090][26076] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,093][26095] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,093][26096] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,244][26072] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,245][26076] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,292][26074] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,298][26077] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,300][26068] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,304][26070] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,312][26069] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,323][26078] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,354][26071] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,400][26072] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,430][26079] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,476][26070] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,497][26078] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,498][26096] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,499][26075] Decorrelating experience for 0 frames... -[2024-07-05 11:08:14,521][26068] Decorrelating experience for 64 frames... -[2024-07-05 11:08:14,591][26077] Decorrelating experience for 64 frames... -[2024-07-05 11:08:14,634][26079] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,651][26095] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,689][26070] Decorrelating experience for 64 frames... -[2024-07-05 11:08:14,713][26072] Decorrelating experience for 64 frames... -[2024-07-05 11:08:14,752][26074] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,800][26068] Decorrelating experience for 96 frames... -[2024-07-05 11:08:14,803][26079] Decorrelating experience for 64 frames... -[2024-07-05 11:08:14,818][26095] Decorrelating experience for 64 frames... -[2024-07-05 11:08:14,819][26077] Decorrelating experience for 96 frames... -[2024-07-05 11:08:14,862][26098] Decorrelating experience for 32 frames... -[2024-07-05 11:08:14,873][26070] Decorrelating experience for 96 frames... -[2024-07-05 11:08:14,898][26072] Decorrelating experience for 96 frames... -[2024-07-05 11:08:14,976][26074] Decorrelating experience for 64 frames... -[2024-07-05 11:08:15,022][26075] Decorrelating experience for 32 frames... -[2024-07-05 11:08:15,027][26096] Decorrelating experience for 64 frames... -[2024-07-05 11:08:15,038][26079] Decorrelating experience for 96 frames... -[2024-07-05 11:08:15,041][26098] Decorrelating experience for 64 frames... -[2024-07-05 11:08:15,043][26068] Decorrelating experience for 128 frames... -[2024-07-05 11:08:15,174][26070] Decorrelating experience for 128 frames... -[2024-07-05 11:08:15,190][26071] Decorrelating experience for 64 frames... -[2024-07-05 11:08:15,222][26077] Decorrelating experience for 128 frames... -[2024-07-05 11:08:15,244][26075] Decorrelating experience for 64 frames... -[2024-07-05 11:08:15,256][26073] Decorrelating experience for 32 frames... -[2024-07-05 11:08:15,275][26074] Decorrelating experience for 96 frames... -[2024-07-05 11:08:15,295][26072] Decorrelating experience for 128 frames... -[2024-07-05 11:08:15,395][26097] Decorrelating experience for 0 frames... -[2024-07-05 11:08:15,429][26077] Decorrelating experience for 160 frames... -[2024-07-05 11:08:15,430][26076] Decorrelating experience for 64 frames... -[2024-07-05 11:08:15,443][26071] Decorrelating experience for 96 frames... -[2024-07-05 11:08:15,456][26070] Decorrelating experience for 160 frames... -[2024-07-05 11:08:15,466][26098] Decorrelating experience for 96 frames... -[2024-07-05 11:08:15,467][26096] Decorrelating experience for 96 frames... -[2024-07-05 11:08:15,525][26095] Decorrelating experience for 96 frames... -[2024-07-05 11:08:15,545][26074] Decorrelating experience for 128 frames... -[2024-07-05 11:08:15,647][26078] Decorrelating experience for 64 frames... -[2024-07-05 11:08:15,652][26079] Decorrelating experience for 128 frames... -[2024-07-05 11:08:15,685][26075] Decorrelating experience for 96 frames... -[2024-07-05 11:08:15,686][26076] Decorrelating experience for 96 frames... -[2024-07-05 11:08:15,686][26097] Decorrelating experience for 32 frames... -[2024-07-05 11:08:15,758][26073] Decorrelating experience for 64 frames... -[2024-07-05 11:08:15,759][26070] Decorrelating experience for 192 frames... -[2024-07-05 11:08:15,791][26071] Decorrelating experience for 128 frames... -[2024-07-05 11:08:15,800][26096] Decorrelating experience for 128 frames... -[2024-07-05 11:08:15,849][26078] Decorrelating experience for 96 frames... -[2024-07-05 11:08:15,876][26072] Decorrelating experience for 160 frames... -[2024-07-05 11:08:15,926][26095] Decorrelating experience for 128 frames... -[2024-07-05 11:08:15,929][26079] Decorrelating experience for 160 frames... -[2024-07-05 11:08:15,945][26098] Decorrelating experience for 128 frames... -[2024-07-05 11:08:16,008][26077] Decorrelating experience for 192 frames... -[2024-07-05 11:08:16,046][26070] Decorrelating experience for 224 frames... -[2024-07-05 11:08:16,048][26096] Decorrelating experience for 160 frames... -[2024-07-05 11:08:16,113][26071] Decorrelating experience for 160 frames... -[2024-07-05 11:08:16,175][26075] Decorrelating experience for 128 frames... -[2024-07-05 11:08:16,194][26078] Decorrelating experience for 128 frames... -[2024-07-05 11:08:16,233][26074] Decorrelating experience for 160 frames... -[2024-07-05 11:08:16,238][26072] Decorrelating experience for 192 frames... -[2024-07-05 11:08:16,246][26095] Decorrelating experience for 160 frames... -[2024-07-05 11:08:16,306][26077] Decorrelating experience for 224 frames... -[2024-07-05 11:08:16,346][26097] Decorrelating experience for 64 frames... -[2024-07-05 11:08:16,424][26075] Decorrelating experience for 160 frames... -[2024-07-05 11:08:16,425][26076] Decorrelating experience for 128 frames... -[2024-07-05 11:08:16,446][26096] Decorrelating experience for 192 frames... -[2024-07-05 11:08:16,450][26071] Decorrelating experience for 192 frames... -[2024-07-05 11:08:16,470][26098] Decorrelating experience for 160 frames... -[2024-07-05 11:08:16,471][26078] Decorrelating experience for 160 frames... -[2024-07-05 11:08:16,580][26068] Decorrelating experience for 160 frames... -[2024-07-05 11:08:16,591][26097] Decorrelating experience for 96 frames... -[2024-07-05 11:08:16,653][26095] Decorrelating experience for 192 frames... -[2024-07-05 11:08:16,658][26079] Decorrelating experience for 192 frames... -[2024-07-05 11:08:16,686][26073] Decorrelating experience for 96 frames... -[2024-07-05 11:08:16,697][26072] Decorrelating experience for 224 frames... -[2024-07-05 11:08:16,698][26075] Decorrelating experience for 192 frames... -[2024-07-05 11:08:16,738][26074] Decorrelating experience for 192 frames... -[2024-07-05 11:08:16,798][26098] Decorrelating experience for 192 frames... -[2024-07-05 11:08:16,905][26096] Decorrelating experience for 224 frames... -[2024-07-05 11:08:16,934][26069] Decorrelating experience for 32 frames... -[2024-07-05 11:08:16,953][26068] Decorrelating experience for 192 frames... -[2024-07-05 11:08:16,954][26095] Decorrelating experience for 224 frames... -[2024-07-05 11:08:16,997][26074] Decorrelating experience for 224 frames... -[2024-07-05 11:08:17,001][26097] Decorrelating experience for 128 frames... -[2024-07-05 11:08:17,017][26071] Decorrelating experience for 224 frames... -[2024-07-05 11:08:17,018][26073] Decorrelating experience for 128 frames... -[2024-07-05 11:08:17,110][26098] Decorrelating experience for 224 frames... -[2024-07-05 11:08:17,162][26076] Decorrelating experience for 160 frames... -[2024-07-05 11:08:17,191][26069] Decorrelating experience for 64 frames... -[2024-07-05 11:08:17,218][26079] Decorrelating experience for 224 frames... -[2024-07-05 11:08:17,242][26078] Decorrelating experience for 192 frames... -[2024-07-05 11:08:17,253][26075] Decorrelating experience for 224 frames... -[2024-07-05 11:08:17,364][26068] Decorrelating experience for 224 frames... -[2024-07-05 11:08:17,393][26097] Decorrelating experience for 160 frames... -[2024-07-05 11:08:17,456][26069] Decorrelating experience for 96 frames... -[2024-07-05 11:08:17,497][26073] Decorrelating experience for 160 frames... -[2024-07-05 11:08:17,579][26078] Decorrelating experience for 224 frames... -[2024-07-05 11:08:17,682][25826] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 20004864. Throughput: 0: nan. Samples: 40. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 11:08:17,683][25826] Avg episode reward: [(0, '1.508')] -[2024-07-05 11:08:17,693][26097] Decorrelating experience for 192 frames... -[2024-07-05 11:08:17,737][26076] Decorrelating experience for 192 frames... -[2024-07-05 11:08:17,865][26047] Signal inference workers to stop experience collection... -[2024-07-05 11:08:17,872][26067] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 11:08:17,985][26097] Decorrelating experience for 224 frames... -[2024-07-05 11:08:17,987][26076] Decorrelating experience for 224 frames... -[2024-07-05 11:08:17,997][26073] Decorrelating experience for 192 frames... -[2024-07-05 11:08:18,206][26069] Decorrelating experience for 128 frames... -[2024-07-05 11:08:18,213][26073] Decorrelating experience for 224 frames... -[2024-07-05 11:08:18,408][26069] Decorrelating experience for 160 frames... -[2024-07-05 11:08:18,621][26069] Decorrelating experience for 192 frames... -[2024-07-05 11:08:18,834][26069] Decorrelating experience for 224 frames... -[2024-07-05 11:08:19,128][26047] Signal inference workers to resume experience collection... -[2024-07-05 11:08:19,128][26067] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 11:08:19,129][26047] Stopping Batcher_0... -[2024-07-05 11:08:19,129][26047] Loop batcher_evt_loop terminating... -[2024-07-05 11:08:19,150][26068] Stopping RolloutWorker_w0... -[2024-07-05 11:08:19,150][26079] Stopping RolloutWorker_w9... -[2024-07-05 11:08:19,151][26077] Stopping RolloutWorker_w11... -[2024-07-05 11:08:19,151][26068] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 11:08:19,151][26095] Stopping RolloutWorker_w13... -[2024-07-05 11:08:19,151][26079] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 11:08:19,151][26077] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 11:08:19,151][26095] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 11:08:19,152][26074] Stopping RolloutWorker_w6... -[2024-07-05 11:08:19,152][26074] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 11:08:19,152][26073] Stopping RolloutWorker_w5... -[2024-07-05 11:08:19,153][26069] Stopping RolloutWorker_w1... -[2024-07-05 11:08:19,153][26069] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 11:08:19,153][26078] Stopping RolloutWorker_w10... -[2024-07-05 11:08:19,153][26071] Stopping RolloutWorker_w3... -[2024-07-05 11:08:19,153][26097] Stopping RolloutWorker_w14... -[2024-07-05 11:08:19,153][26073] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 11:08:19,153][26078] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 11:08:19,153][26072] Stopping RolloutWorker_w4... -[2024-07-05 11:08:19,153][26076] Stopping RolloutWorker_w8... -[2024-07-05 11:08:19,153][26071] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 11:08:19,154][26097] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 11:08:19,154][26076] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 11:08:19,154][26072] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 11:08:19,153][26070] Stopping RolloutWorker_w2... -[2024-07-05 11:08:19,154][26070] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 11:08:19,160][25826] Component Batcher_0 stopped! -[2024-07-05 11:08:19,162][25826] Component RolloutWorker_w0 stopped! -[2024-07-05 11:08:19,162][25826] Component RolloutWorker_w9 stopped! -[2024-07-05 11:08:19,163][25826] Component RolloutWorker_w11 stopped! -[2024-07-05 11:08:19,164][25826] Component RolloutWorker_w13 stopped! -[2024-07-05 11:08:19,165][25826] Component RolloutWorker_w6 stopped! -[2024-07-05 11:08:19,166][25826] Component RolloutWorker_w5 stopped! -[2024-07-05 11:08:19,167][25826] Component RolloutWorker_w1 stopped! -[2024-07-05 11:08:19,168][25826] Component RolloutWorker_w3 stopped! -[2024-07-05 11:08:19,169][25826] Component RolloutWorker_w10 stopped! -[2024-07-05 11:08:19,170][26098] Stopping RolloutWorker_w15... -[2024-07-05 11:08:19,170][25826] Component RolloutWorker_w14 stopped! -[2024-07-05 11:08:19,171][25826] Component RolloutWorker_w4 stopped! -[2024-07-05 11:08:19,172][25826] Component RolloutWorker_w8 stopped! -[2024-07-05 11:08:19,171][26098] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 11:08:19,173][25826] Component RolloutWorker_w2 stopped! -[2024-07-05 11:08:19,173][26067] Weights refcount: 2 0 -[2024-07-05 11:08:19,174][25826] Component RolloutWorker_w15 stopped! -[2024-07-05 11:08:19,177][26096] Stopping RolloutWorker_w12... -[2024-07-05 11:08:19,177][26096] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 11:08:19,177][25826] Component RolloutWorker_w12 stopped! -[2024-07-05 11:08:19,178][26067] Stopping InferenceWorker_p0-w0... -[2024-07-05 11:08:19,179][26067] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 11:08:19,178][25826] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 11:08:19,183][26075] Stopping RolloutWorker_w7... -[2024-07-05 11:08:19,184][26075] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 11:08:19,183][25826] Component RolloutWorker_w7 stopped! -[2024-07-05 11:08:19,639][26047] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004886_20021248.pth... -[2024-07-05 11:08:19,724][26047] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004726_19357696.pth -[2024-07-05 11:08:19,726][26047] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004886_20021248.pth... -[2024-07-05 11:08:19,830][26047] Stopping LearnerWorker_p0... -[2024-07-05 11:08:19,831][26047] Loop learner_proc0_evt_loop terminating... -[2024-07-05 11:08:19,831][25826] Component LearnerWorker_p0 stopped! -[2024-07-05 11:08:19,832][25826] Waiting for process learner_proc0 to stop... -[2024-07-05 11:08:20,936][25826] Waiting for process inference_proc0-0 to join... -[2024-07-05 11:08:20,937][25826] Waiting for process rollout_proc0 to join... -[2024-07-05 11:08:20,938][25826] Waiting for process rollout_proc1 to join... -[2024-07-05 11:08:20,938][25826] Waiting for process rollout_proc2 to join... -[2024-07-05 11:08:20,939][25826] Waiting for process rollout_proc3 to join... -[2024-07-05 11:08:20,939][25826] Waiting for process rollout_proc4 to join... -[2024-07-05 11:08:20,939][25826] Waiting for process rollout_proc5 to join... -[2024-07-05 11:08:20,939][25826] Waiting for process rollout_proc6 to join... -[2024-07-05 11:08:20,940][25826] Waiting for process rollout_proc7 to join... -[2024-07-05 11:08:20,940][25826] Waiting for process rollout_proc8 to join... -[2024-07-05 11:08:20,940][25826] Waiting for process rollout_proc9 to join... -[2024-07-05 11:08:20,941][25826] Waiting for process rollout_proc10 to join... -[2024-07-05 11:08:20,941][25826] Waiting for process rollout_proc11 to join... -[2024-07-05 11:08:20,941][25826] Waiting for process rollout_proc12 to join... -[2024-07-05 11:08:20,942][25826] Waiting for process rollout_proc13 to join... -[2024-07-05 11:08:20,942][25826] Waiting for process rollout_proc14 to join... -[2024-07-05 11:08:20,943][25826] Waiting for process rollout_proc15 to join... -[2024-07-05 11:08:20,944][25826] Batcher 0 profile tree view: -batching: 0.0284, releasing_batches: 0.0005 -[2024-07-05 11:08:20,944][25826] InferenceWorker_p0-w0 profile tree view: -update_model: 0.0100 -wait_policy: 0.0000 - wait_policy_total: 3.1260 -one_step: 0.0032 - handle_policy_step: 1.2155 - deserialize: 0.0590, stack: 0.0047, obs_to_device_normalize: 0.2043, forward: 0.8275, send_messages: 0.0327 - prepare_outputs: 0.0598 - to_cpu: 0.0224 -[2024-07-05 11:08:20,945][25826] Learner 0 profile tree view: -misc: 0.0000, prepare_batch: 0.9804 -train: 1.1628 - epoch_init: 0.0000, minibatch_init: 0.0000, losses_postprocess: 0.0002, kl_divergence: 0.0066, after_optimizer: 0.0290 - calculate_losses: 0.4615 - losses_init: 0.0000, forward_head: 0.2044, bptt_initial: 0.2169, tail: 0.0182, advantages_returns: 0.0007, losses: 0.0191 - bptt: 0.0018 - bptt_forward_core: 0.0017 - update: 0.6646 - clip: 0.0292 -[2024-07-05 11:08:20,945][25826] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.0003, enqueue_policy_requests: 0.0118, env_step: 0.1488, overhead: 0.0149, complete_rollouts: 0.0000 -save_policy_outputs: 0.0085 - split_output_tensors: 0.0039 -[2024-07-05 11:08:20,946][25826] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.0004, enqueue_policy_requests: 0.0282, env_step: 0.2969, overhead: 0.0322, complete_rollouts: 0.0003 -save_policy_outputs: 0.0203 - split_output_tensors: 0.0095 -[2024-07-05 11:08:20,946][25826] Loop Runner_EvtLoop terminating... -[2024-07-05 11:08:20,947][25826] Runner profile tree view: -main_loop: 13.2131 -[2024-07-05 11:08:20,947][25826] Collected {0: 20021248}, FPS: 1240.0 -[2024-07-05 11:08:41,198][25826] Environment doom_basic already registered, overwriting... -[2024-07-05 11:08:41,199][25826] Environment doom_two_colors_easy already registered, overwriting... -[2024-07-05 11:08:41,199][25826] Environment doom_two_colors_hard already registered, overwriting... -[2024-07-05 11:08:41,200][25826] Environment doom_dm already registered, overwriting... -[2024-07-05 11:08:41,200][25826] Environment doom_dwango5 already registered, overwriting... -[2024-07-05 11:08:41,200][25826] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-07-05 11:08:41,201][25826] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-07-05 11:08:41,201][25826] Environment doom_my_way_home already registered, overwriting... -[2024-07-05 11:08:41,201][25826] Environment doom_deadly_corridor already registered, overwriting... -[2024-07-05 11:08:41,201][25826] Environment doom_defend_the_center already registered, overwriting... -[2024-07-05 11:08:41,202][25826] Environment doom_defend_the_line already registered, overwriting... -[2024-07-05 11:08:41,202][25826] Environment doom_health_gathering already registered, overwriting... -[2024-07-05 11:08:41,202][25826] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-07-05 11:08:41,202][25826] Environment doom_battle already registered, overwriting... -[2024-07-05 11:08:41,203][25826] Environment doom_battle2 already registered, overwriting... -[2024-07-05 11:08:41,203][25826] Environment doom_duel_bots already registered, overwriting... -[2024-07-05 11:08:41,203][25826] Environment doom_deathmatch_bots already registered, overwriting... -[2024-07-05 11:08:41,203][25826] Environment doom_duel already registered, overwriting... -[2024-07-05 11:08:41,204][25826] Environment doom_deathmatch_full already registered, overwriting... -[2024-07-05 11:08:41,204][25826] Environment doom_benchmark already registered, overwriting... -[2024-07-05 11:08:41,204][25826] register_encoder_factory: -[2024-07-05 11:08:41,209][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 11:08:41,210][25826] Overriding arg 'train_for_env_steps' with value 30000000 passed from command line -[2024-07-05 11:08:41,214][25826] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment already exists! -[2024-07-05 11:08:41,214][25826] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment... -[2024-07-05 11:08:41,214][25826] Weights and Biases integration disabled -[2024-07-05 11:08:41,216][25826] Environment var CUDA_VISIBLE_DEVICES is 0 - -[2024-07-05 11:08:43,804][25826] Starting experiment with the following configuration: -help=False -algo=APPO -env=doom_health_gathering_supreme -experiment=default_experiment -train_dir=/home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir -restart_behavior=resume -device=gpu -seed=200 -num_policies=1 -async_rl=True -serial_mode=False -batched_sampling=False -num_batches_to_accumulate=2 -worker_num_splits=2 -policy_workers_per_policy=1 -max_policy_lag=1000 -num_workers=16 -num_envs_per_worker=8 -batch_size=2048 -num_batches_per_epoch=1 -num_epochs=1 -rollout=32 -recurrence=32 -shuffle_minibatches=False -gamma=0.99 -reward_scale=1.0 -reward_clip=1000.0 -value_bootstrap=False -normalize_returns=True -exploration_loss_coeff=0.001 -value_loss_coeff=0.5 -kl_loss_coeff=0.0 -exploration_loss=symmetric_kl -gae_lambda=0.95 -ppo_clip_ratio=0.1 -ppo_clip_value=0.2 -with_vtrace=False -vtrace_rho=1.0 -vtrace_c=1.0 -optimizer=adam -adam_eps=1e-06 -adam_beta1=0.9 -adam_beta2=0.999 -max_grad_norm=4.0 -learning_rate=0.0001 -lr_schedule=constant -lr_schedule_kl_threshold=0.008 -lr_adaptive_min=1e-06 -lr_adaptive_max=0.01 -obs_subtract_mean=0.0 -obs_scale=255.0 -normalize_input=True -normalize_input_keys=None -decorrelate_experience_max_seconds=0 -decorrelate_envs_on_one_worker=True -actor_worker_gpus=[] -set_workers_cpu_affinity=True -force_envs_single_thread=False -default_niceness=0 -log_to_file=True -experiment_summaries_interval=10 -flush_summaries_interval=30 -stats_avg=100 -summaries_use_frameskip=True -heartbeat_interval=20 -heartbeat_reporting_interval=600 -train_for_env_steps=30000000 -train_for_seconds=10000000000 -save_every_sec=120 -keep_checkpoints=2 -load_checkpoint_kind=latest -save_milestones_sec=-1 -save_best_every_sec=5 -save_best_metric=reward -save_best_after=100000 -benchmark=False -encoder_mlp_layers=[512, 512] -encoder_conv_architecture=convnet_simple -encoder_conv_mlp_layers=[512] -use_rnn=True -rnn_size=512 -rnn_type=gru -rnn_num_layers=1 -decoder_mlp_layers=[] -nonlinearity=elu -policy_initialization=orthogonal -policy_init_gain=1.0 -actor_critic_share_weights=True -adaptive_stddev=True -continuous_tanh_scale=0.0 -initial_stddev=1.0 -use_env_info_cache=False -env_gpu_actions=False -env_gpu_observations=True -env_frameskip=4 -env_framestack=1 -pixel_format=CHW -use_record_episode_statistics=False -with_wandb=False -wandb_user=None -wandb_project=sample_factory -wandb_group=None -wandb_job_type=SF -wandb_tags=[] -with_pbt=False -pbt_mix_policies_in_one_env=True -pbt_period_env_steps=5000000 -pbt_start_mutation=20000000 -pbt_replace_fraction=0.3 -pbt_mutation_rate=0.15 -pbt_replace_reward_gap=0.1 -pbt_replace_reward_gap_absolute=1e-06 -pbt_optimize_gamma=False -pbt_target_objective=true_objective -pbt_perturb_min=1.1 -pbt_perturb_max=1.5 -num_agents=-1 -num_humans=0 -num_bots=-1 -start_bot_difficulty=None -timelimit=None -res_w=128 -res_h=72 -wide_aspect_ratio=False -eval_env_frameskip=1 -fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=20000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 20000000} -git_hash=unknown -git_repo_name=not a git repository -[2024-07-05 11:08:43,805][25826] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 11:08:43,806][25826] Rollout worker 0 uses device cpu -[2024-07-05 11:08:43,807][25826] Rollout worker 1 uses device cpu -[2024-07-05 11:08:43,807][25826] Rollout worker 2 uses device cpu -[2024-07-05 11:08:43,807][25826] Rollout worker 3 uses device cpu -[2024-07-05 11:08:43,807][25826] Rollout worker 4 uses device cpu -[2024-07-05 11:08:43,808][25826] Rollout worker 5 uses device cpu -[2024-07-05 11:08:43,808][25826] Rollout worker 6 uses device cpu -[2024-07-05 11:08:43,808][25826] Rollout worker 7 uses device cpu -[2024-07-05 11:08:43,808][25826] Rollout worker 8 uses device cpu -[2024-07-05 11:08:43,808][25826] Rollout worker 9 uses device cpu -[2024-07-05 11:08:43,809][25826] Rollout worker 10 uses device cpu -[2024-07-05 11:08:43,809][25826] Rollout worker 11 uses device cpu -[2024-07-05 11:08:43,810][25826] Rollout worker 12 uses device cpu -[2024-07-05 11:08:43,810][25826] Rollout worker 13 uses device cpu -[2024-07-05 11:08:43,810][25826] Rollout worker 14 uses device cpu -[2024-07-05 11:08:43,811][25826] Rollout worker 15 uses device cpu -[2024-07-05 11:08:43,905][25826] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:08:43,906][25826] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 11:08:43,957][25826] Starting all processes... -[2024-07-05 11:08:43,958][25826] Starting process learner_proc0 -[2024-07-05 11:08:44,007][25826] Starting all processes... -[2024-07-05 11:08:44,011][25826] Starting process inference_proc0-0 -[2024-07-05 11:08:44,011][25826] Starting process rollout_proc0 -[2024-07-05 11:08:44,012][25826] Starting process rollout_proc1 -[2024-07-05 11:08:44,012][25826] Starting process rollout_proc2 -[2024-07-05 11:08:44,013][25826] Starting process rollout_proc3 -[2024-07-05 11:08:44,013][25826] Starting process rollout_proc4 -[2024-07-05 11:08:44,017][25826] Starting process rollout_proc5 -[2024-07-05 11:08:44,017][25826] Starting process rollout_proc6 -[2024-07-05 11:08:44,017][25826] Starting process rollout_proc7 -[2024-07-05 11:08:44,044][25826] Starting process rollout_proc8 -[2024-07-05 11:08:44,045][25826] Starting process rollout_proc9 -[2024-07-05 11:08:44,046][25826] Starting process rollout_proc10 -[2024-07-05 11:08:44,047][25826] Starting process rollout_proc11 -[2024-07-05 11:08:44,047][25826] Starting process rollout_proc12 -[2024-07-05 11:08:44,049][25826] Starting process rollout_proc13 -[2024-07-05 11:08:44,052][25826] Starting process rollout_proc14 -[2024-07-05 11:08:44,069][25826] Starting process rollout_proc15 -[2024-07-05 11:08:47,949][29813] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:08:47,950][29813] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 11:08:48,120][29838] Worker 7 uses CPU cores [7] -[2024-07-05 11:08:48,140][29840] Worker 5 uses CPU cores [5] -[2024-07-05 11:08:48,172][29834] Worker 0 uses CPU cores [0] -[2024-07-05 11:08:48,196][29864] Worker 15 uses CPU cores [15] -[2024-07-05 11:08:48,220][29863] Worker 14 uses CPU cores [14] -[2024-07-05 11:08:48,232][29841] Worker 4 uses CPU cores [4] -[2024-07-05 11:08:48,240][29845] Worker 10 uses CPU cores [10] -[2024-07-05 11:08:48,291][29837] Worker 2 uses CPU cores [2] -[2024-07-05 11:08:48,300][29842] Worker 8 uses CPU cores [8] -[2024-07-05 11:08:48,309][29843] Worker 9 uses CPU cores [9] -[2024-07-05 11:08:48,355][29835] Worker 1 uses CPU cores [1] -[2024-07-05 11:08:48,356][29833] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:08:48,357][29833] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 11:08:48,412][29861] Worker 12 uses CPU cores [12] -[2024-07-05 11:08:48,424][29836] Worker 3 uses CPU cores [3] -[2024-07-05 11:08:48,435][29862] Worker 13 uses CPU cores [13] -[2024-07-05 11:08:48,445][29844] Worker 11 uses CPU cores [11] -[2024-07-05 11:08:48,560][29839] Worker 6 uses CPU cores [6] -[2024-07-05 11:08:49,578][29813] Num visible devices: 1 -[2024-07-05 11:08:49,578][29833] Num visible devices: 1 -[2024-07-05 11:08:49,601][29813] Setting fixed seed 200 -[2024-07-05 11:08:49,603][29813] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:08:49,603][29813] Initializing actor-critic model on device cuda:0 -[2024-07-05 11:08:49,603][29813] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:08:49,604][29813] RunningMeanStd input shape: (1,) -[2024-07-05 11:08:49,613][29813] ConvEncoder: input_channels=3 -[2024-07-05 11:08:49,672][29813] Conv encoder output size: 512 -[2024-07-05 11:08:49,672][29813] Policy head output size: 512 -[2024-07-05 11:08:49,680][29813] Created Actor Critic model with architecture: -[2024-07-05 11:08:49,680][29813] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) - ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) - ) - ) - ) - ) - (core): ModelCoreRNN( - (core): GRU(512, 512) - ) - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=5, bias=True) - ) -) -[2024-07-05 11:08:49,771][29813] Using optimizer -[2024-07-05 11:08:50,384][29813] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004886_20021248.pth... -[2024-07-05 11:08:50,401][29813] Loading model from checkpoint -[2024-07-05 11:08:50,402][29813] Loaded experiment state at self.train_step=4886, self.env_steps=20021248 -[2024-07-05 11:08:50,402][29813] Initialized policy 0 weights for model version 4886 -[2024-07-05 11:08:50,404][29813] LearnerWorker_p0 finished initialization! -[2024-07-05 11:08:50,404][29813] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:08:50,466][29833] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:08:50,466][29833] RunningMeanStd input shape: (1,) -[2024-07-05 11:08:50,474][29833] ConvEncoder: input_channels=3 -[2024-07-05 11:08:50,527][29833] Conv encoder output size: 512 -[2024-07-05 11:08:50,527][29833] Policy head output size: 512 -[2024-07-05 11:08:50,559][25826] Inference worker 0-0 is ready! -[2024-07-05 11:08:50,560][25826] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 11:08:50,607][29844] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,607][29862] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,609][29834] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,609][29864] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,611][29836] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,611][29843] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,612][29837] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,612][29845] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,613][29838] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,612][29835] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,618][29839] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,618][29841] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,619][29840] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,623][29863] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,626][29842] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:50,644][29861] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:08:51,216][25826] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 20021248. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 11:08:51,229][29834] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,231][29844] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,231][29836] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,232][29863] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,232][29843] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,233][29841] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,235][29862] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,235][29861] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,386][29862] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,386][29844] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,413][29840] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,437][29836] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,443][29863] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,455][29864] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,468][29837] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,558][29835] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,573][29844] Decorrelating experience for 64 frames... -[2024-07-05 11:08:51,578][29862] Decorrelating experience for 64 frames... -[2024-07-05 11:08:51,605][29840] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,616][29864] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,636][29837] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,681][29841] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,681][29861] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,690][29845] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,719][29835] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,750][29843] Decorrelating experience for 32 frames... -[2024-07-05 11:08:51,772][29836] Decorrelating experience for 64 frames... -[2024-07-05 11:08:51,782][29839] Decorrelating experience for 0 frames... -[2024-07-05 11:08:51,844][29841] Decorrelating experience for 64 frames... -[2024-07-05 11:08:51,861][29861] Decorrelating experience for 64 frames... -[2024-07-05 11:08:51,896][29864] Decorrelating experience for 64 frames... -[2024-07-05 11:08:51,920][29843] Decorrelating experience for 64 frames... -[2024-07-05 11:08:51,982][29862] Decorrelating experience for 96 frames... -[2024-07-05 11:08:51,993][29835] Decorrelating experience for 64 frames... -[2024-07-05 11:08:51,998][29834] Decorrelating experience for 32 frames... -[2024-07-05 11:08:52,066][29837] Decorrelating experience for 64 frames... -[2024-07-05 11:08:52,073][29863] Decorrelating experience for 64 frames... -[2024-07-05 11:08:52,080][29841] Decorrelating experience for 96 frames... -[2024-07-05 11:08:52,089][29840] Decorrelating experience for 64 frames... -[2024-07-05 11:08:52,138][29864] Decorrelating experience for 96 frames... -[2024-07-05 11:08:52,184][29861] Decorrelating experience for 96 frames... -[2024-07-05 11:08:52,187][29842] Decorrelating experience for 0 frames... -[2024-07-05 11:08:52,247][29837] Decorrelating experience for 96 frames... -[2024-07-05 11:08:52,300][29863] Decorrelating experience for 96 frames... -[2024-07-05 11:08:52,334][29840] Decorrelating experience for 96 frames... -[2024-07-05 11:08:52,347][29862] Decorrelating experience for 128 frames... -[2024-07-05 11:08:52,347][29842] Decorrelating experience for 32 frames... -[2024-07-05 11:08:52,420][29844] Decorrelating experience for 96 frames... -[2024-07-05 11:08:52,420][29841] Decorrelating experience for 128 frames... -[2024-07-05 11:08:52,528][29861] Decorrelating experience for 128 frames... -[2024-07-05 11:08:52,530][29835] Decorrelating experience for 96 frames... -[2024-07-05 11:08:52,542][29842] Decorrelating experience for 64 frames... -[2024-07-05 11:08:52,550][29838] Decorrelating experience for 0 frames... -[2024-07-05 11:08:52,576][29863] Decorrelating experience for 128 frames... -[2024-07-05 11:08:52,625][29840] Decorrelating experience for 128 frames... -[2024-07-05 11:08:52,645][29843] Decorrelating experience for 96 frames... -[2024-07-05 11:08:52,678][29864] Decorrelating experience for 128 frames... -[2024-07-05 11:08:52,725][29862] Decorrelating experience for 160 frames... -[2024-07-05 11:08:52,745][29844] Decorrelating experience for 128 frames... -[2024-07-05 11:08:52,764][29836] Decorrelating experience for 96 frames... -[2024-07-05 11:08:52,767][29839] Decorrelating experience for 32 frames... -[2024-07-05 11:08:52,780][29838] Decorrelating experience for 32 frames... -[2024-07-05 11:08:52,828][29841] Decorrelating experience for 160 frames... -[2024-07-05 11:08:52,855][29845] Decorrelating experience for 32 frames... -[2024-07-05 11:08:52,897][29843] Decorrelating experience for 128 frames... -[2024-07-05 11:08:52,927][29864] Decorrelating experience for 160 frames... -[2024-07-05 11:08:52,935][29834] Decorrelating experience for 64 frames... -[2024-07-05 11:08:52,953][29840] Decorrelating experience for 160 frames... -[2024-07-05 11:08:53,005][29838] Decorrelating experience for 64 frames... -[2024-07-05 11:08:53,006][29863] Decorrelating experience for 160 frames... -[2024-07-05 11:08:53,039][29861] Decorrelating experience for 160 frames... -[2024-07-05 11:08:53,084][29844] Decorrelating experience for 160 frames... -[2024-07-05 11:08:53,096][29841] Decorrelating experience for 192 frames... -[2024-07-05 11:08:53,106][29862] Decorrelating experience for 192 frames... -[2024-07-05 11:08:53,139][29842] Decorrelating experience for 96 frames... -[2024-07-05 11:08:53,170][29837] Decorrelating experience for 128 frames... -[2024-07-05 11:08:53,177][29836] Decorrelating experience for 128 frames... -[2024-07-05 11:08:53,218][29845] Decorrelating experience for 64 frames... -[2024-07-05 11:08:53,229][29864] Decorrelating experience for 192 frames... -[2024-07-05 11:08:53,306][29834] Decorrelating experience for 96 frames... -[2024-07-05 11:08:53,327][29862] Decorrelating experience for 224 frames... -[2024-07-05 11:08:53,358][29863] Decorrelating experience for 192 frames... -[2024-07-05 11:08:53,366][29841] Decorrelating experience for 224 frames... -[2024-07-05 11:08:53,390][29835] Decorrelating experience for 128 frames... -[2024-07-05 11:08:53,397][29838] Decorrelating experience for 96 frames... -[2024-07-05 11:08:53,397][29837] Decorrelating experience for 160 frames... -[2024-07-05 11:08:53,533][29843] Decorrelating experience for 160 frames... -[2024-07-05 11:08:53,554][29845] Decorrelating experience for 96 frames... -[2024-07-05 11:08:53,612][29836] Decorrelating experience for 160 frames... -[2024-07-05 11:08:53,626][29844] Decorrelating experience for 192 frames... -[2024-07-05 11:08:53,634][29864] Decorrelating experience for 224 frames... -[2024-07-05 11:08:53,645][29863] Decorrelating experience for 224 frames... -[2024-07-05 11:08:53,650][29838] Decorrelating experience for 128 frames... -[2024-07-05 11:08:53,694][29837] Decorrelating experience for 192 frames... -[2024-07-05 11:08:53,842][29861] Decorrelating experience for 192 frames... -[2024-07-05 11:08:53,860][29838] Decorrelating experience for 160 frames... -[2024-07-05 11:08:53,864][29835] Decorrelating experience for 160 frames... -[2024-07-05 11:08:53,868][29844] Decorrelating experience for 224 frames... -[2024-07-05 11:08:53,901][29834] Decorrelating experience for 128 frames... -[2024-07-05 11:08:53,908][29843] Decorrelating experience for 192 frames... -[2024-07-05 11:08:54,046][29837] Decorrelating experience for 224 frames... -[2024-07-05 11:08:54,096][29838] Decorrelating experience for 192 frames... -[2024-07-05 11:08:54,115][29842] Decorrelating experience for 128 frames... -[2024-07-05 11:08:54,124][29861] Decorrelating experience for 224 frames... -[2024-07-05 11:08:54,149][29845] Decorrelating experience for 128 frames... -[2024-07-05 11:08:54,227][29843] Decorrelating experience for 224 frames... -[2024-07-05 11:08:54,266][29834] Decorrelating experience for 160 frames... -[2024-07-05 11:08:54,354][29840] Decorrelating experience for 192 frames... -[2024-07-05 11:08:54,368][29835] Decorrelating experience for 192 frames... -[2024-07-05 11:08:54,391][29842] Decorrelating experience for 160 frames... -[2024-07-05 11:08:54,510][29838] Decorrelating experience for 224 frames... -[2024-07-05 11:08:54,570][29845] Decorrelating experience for 160 frames... -[2024-07-05 11:08:54,641][29813] Signal inference workers to stop experience collection... -[2024-07-05 11:08:54,646][29833] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 11:08:54,647][29834] Decorrelating experience for 192 frames... -[2024-07-05 11:08:54,655][29840] Decorrelating experience for 224 frames... -[2024-07-05 11:08:54,692][29835] Decorrelating experience for 224 frames... -[2024-07-05 11:08:54,715][29836] Decorrelating experience for 192 frames... -[2024-07-05 11:08:54,782][29839] Decorrelating experience for 64 frames... -[2024-07-05 11:08:54,811][29845] Decorrelating experience for 192 frames... -[2024-07-05 11:08:54,882][29842] Decorrelating experience for 192 frames... -[2024-07-05 11:08:54,932][29836] Decorrelating experience for 224 frames... -[2024-07-05 11:08:54,964][29839] Decorrelating experience for 96 frames... -[2024-07-05 11:08:54,992][29834] Decorrelating experience for 224 frames... -[2024-07-05 11:08:55,028][29845] Decorrelating experience for 224 frames... -[2024-07-05 11:08:55,113][29842] Decorrelating experience for 224 frames... -[2024-07-05 11:08:55,226][29839] Decorrelating experience for 128 frames... -[2024-07-05 11:08:55,422][29839] Decorrelating experience for 160 frames... -[2024-07-05 11:08:55,630][29839] Decorrelating experience for 192 frames... -[2024-07-05 11:08:55,849][29839] Decorrelating experience for 224 frames... -[2024-07-05 11:08:55,926][29813] Signal inference workers to resume experience collection... -[2024-07-05 11:08:55,927][29833] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 11:08:56,216][25826] Fps is (10 sec: 1638.5, 60 sec: 1638.5, 300 sec: 1638.5). Total num frames: 20029440. Throughput: 0: 10.4. Samples: 52. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 11:08:56,216][25826] Avg episode reward: [(0, '2.025')] -[2024-07-05 11:08:57,800][29833] Updated weights for policy 0, policy_version 4896 (0.0099) -[2024-07-05 11:08:59,569][29833] Updated weights for policy 0, policy_version 4906 (0.0008) -[2024-07-05 11:09:01,216][25826] Fps is (10 sec: 23757.3, 60 sec: 23757.3, 300 sec: 23757.3). Total num frames: 20258816. Throughput: 0: 4296.1. Samples: 42960. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:09:01,217][25826] Avg episode reward: [(0, '32.492')] -[2024-07-05 11:09:01,298][29833] Updated weights for policy 0, policy_version 4916 (0.0013) -[2024-07-05 11:09:02,931][29833] Updated weights for policy 0, policy_version 4926 (0.0008) -[2024-07-05 11:09:03,899][25826] Heartbeat connected on Batcher_0 -[2024-07-05 11:09:03,911][25826] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 11:09:03,913][25826] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 11:09:03,915][25826] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 11:09:03,922][25826] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 11:09:03,922][25826] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 11:09:03,925][25826] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 11:09:03,927][25826] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 11:09:03,934][25826] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 11:09:03,938][25826] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 11:09:03,939][25826] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 11:09:03,940][25826] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 11:09:03,941][25826] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 11:09:03,945][25826] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 11:09:03,947][25826] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 11:09:03,950][25826] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 11:09:03,953][25826] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 11:09:03,961][25826] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 11:09:03,975][25826] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 11:09:04,664][29833] Updated weights for policy 0, policy_version 4936 (0.0011) -[2024-07-05 11:09:06,216][25826] Fps is (10 sec: 46694.3, 60 sec: 31676.1, 300 sec: 31676.1). Total num frames: 20496384. Throughput: 0: 7701.7. Samples: 115524. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:09:06,217][25826] Avg episode reward: [(0, '32.579')] -[2024-07-05 11:09:06,385][29833] Updated weights for policy 0, policy_version 4946 (0.0008) -[2024-07-05 11:09:08,087][29833] Updated weights for policy 0, policy_version 4956 (0.0010) -[2024-07-05 11:09:09,769][29833] Updated weights for policy 0, policy_version 4966 (0.0008) -[2024-07-05 11:09:11,216][25826] Fps is (10 sec: 48331.9, 60 sec: 36044.8, 300 sec: 36044.8). Total num frames: 20742144. Throughput: 0: 7593.0. Samples: 151860. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:09:11,217][25826] Avg episode reward: [(0, '29.500')] -[2024-07-05 11:09:11,393][29833] Updated weights for policy 0, policy_version 4976 (0.0011) -[2024-07-05 11:09:13,083][29833] Updated weights for policy 0, policy_version 4986 (0.0011) -[2024-07-05 11:09:14,797][29833] Updated weights for policy 0, policy_version 4996 (0.0008) -[2024-07-05 11:09:16,216][25826] Fps is (10 sec: 49151.1, 60 sec: 38666.2, 300 sec: 38666.2). Total num frames: 20987904. Throughput: 0: 9014.6. Samples: 225364. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:09:16,217][25826] Avg episode reward: [(0, '34.356')] -[2024-07-05 11:09:16,495][29833] Updated weights for policy 0, policy_version 5006 (0.0008) -[2024-07-05 11:09:18,144][29833] Updated weights for policy 0, policy_version 5016 (0.0008) -[2024-07-05 11:09:19,888][29833] Updated weights for policy 0, policy_version 5026 (0.0009) -[2024-07-05 11:09:21,216][25826] Fps is (10 sec: 48333.3, 60 sec: 40141.0, 300 sec: 40141.0). Total num frames: 21225472. Throughput: 0: 9905.4. Samples: 297160. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:09:21,217][25826] Avg episode reward: [(0, '35.042')] -[2024-07-05 11:09:21,221][29813] Saving new best policy, reward=35.042! -[2024-07-05 11:09:21,611][29833] Updated weights for policy 0, policy_version 5036 (0.0008) -[2024-07-05 11:09:23,314][29833] Updated weights for policy 0, policy_version 5046 (0.0007) -[2024-07-05 11:09:25,017][29833] Updated weights for policy 0, policy_version 5056 (0.0009) -[2024-07-05 11:09:26,216][25826] Fps is (10 sec: 48333.5, 60 sec: 41428.3, 300 sec: 41428.3). Total num frames: 21471232. Throughput: 0: 9528.4. Samples: 333492. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:09:26,217][25826] Avg episode reward: [(0, '34.692')] -[2024-07-05 11:09:26,686][29833] Updated weights for policy 0, policy_version 5066 (0.0008) -[2024-07-05 11:09:28,418][29833] Updated weights for policy 0, policy_version 5076 (0.0009) -[2024-07-05 11:09:30,124][29833] Updated weights for policy 0, policy_version 5086 (0.0008) -[2024-07-05 11:09:31,216][25826] Fps is (10 sec: 48332.9, 60 sec: 42188.9, 300 sec: 42188.9). Total num frames: 21708800. Throughput: 0: 10136.9. Samples: 405476. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:09:31,218][25826] Avg episode reward: [(0, '34.679')] -[2024-07-05 11:09:31,849][29833] Updated weights for policy 0, policy_version 5096 (0.0008) -[2024-07-05 11:09:33,519][29833] Updated weights for policy 0, policy_version 5106 (0.0011) -[2024-07-05 11:09:35,231][29833] Updated weights for policy 0, policy_version 5116 (0.0008) -[2024-07-05 11:09:36,216][25826] Fps is (10 sec: 47513.7, 60 sec: 42780.6, 300 sec: 42780.6). Total num frames: 21946368. Throughput: 0: 10621.5. Samples: 477968. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:09:36,217][25826] Avg episode reward: [(0, '37.249')] -[2024-07-05 11:09:36,235][29813] Saving new best policy, reward=37.249! -[2024-07-05 11:09:36,915][29833] Updated weights for policy 0, policy_version 5126 (0.0011) -[2024-07-05 11:09:38,612][29833] Updated weights for policy 0, policy_version 5136 (0.0009) -[2024-07-05 11:09:40,305][29833] Updated weights for policy 0, policy_version 5146 (0.0013) -[2024-07-05 11:09:41,216][25826] Fps is (10 sec: 48332.3, 60 sec: 43417.6, 300 sec: 43417.6). Total num frames: 22192128. Throughput: 0: 11428.1. Samples: 514316. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:09:41,217][25826] Avg episode reward: [(0, '34.670')] -[2024-07-05 11:09:41,983][29833] Updated weights for policy 0, policy_version 5156 (0.0008) -[2024-07-05 11:09:43,662][29833] Updated weights for policy 0, policy_version 5166 (0.0008) -[2024-07-05 11:09:45,309][29833] Updated weights for policy 0, policy_version 5176 (0.0009) -[2024-07-05 11:09:46,216][25826] Fps is (10 sec: 49151.5, 60 sec: 43938.9, 300 sec: 43938.9). Total num frames: 22437888. Throughput: 0: 12098.9. Samples: 587412. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:09:46,217][25826] Avg episode reward: [(0, '33.198')] -[2024-07-05 11:09:46,985][29833] Updated weights for policy 0, policy_version 5186 (0.0011) -[2024-07-05 11:09:48,623][29833] Updated weights for policy 0, policy_version 5196 (0.0007) -[2024-07-05 11:09:50,316][29833] Updated weights for policy 0, policy_version 5206 (0.0008) -[2024-07-05 11:09:51,216][25826] Fps is (10 sec: 49151.9, 60 sec: 44373.3, 300 sec: 44373.3). Total num frames: 22683648. Throughput: 0: 12120.7. Samples: 660956. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:09:51,217][25826] Avg episode reward: [(0, '34.530')] -[2024-07-05 11:09:51,983][29833] Updated weights for policy 0, policy_version 5216 (0.0008) -[2024-07-05 11:09:53,706][29833] Updated weights for policy 0, policy_version 5226 (0.0007) -[2024-07-05 11:09:55,369][29833] Updated weights for policy 0, policy_version 5236 (0.0008) -[2024-07-05 11:09:56,216][25826] Fps is (10 sec: 49152.1, 60 sec: 48332.7, 300 sec: 44741.0). Total num frames: 22929408. Throughput: 0: 12125.0. Samples: 697484. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:09:56,217][25826] Avg episode reward: [(0, '34.537')] -[2024-07-05 11:09:57,077][29833] Updated weights for policy 0, policy_version 5246 (0.0010) -[2024-07-05 11:09:58,801][29833] Updated weights for policy 0, policy_version 5256 (0.0009) -[2024-07-05 11:10:00,485][29833] Updated weights for policy 0, policy_version 5266 (0.0008) -[2024-07-05 11:10:01,216][25826] Fps is (10 sec: 48333.2, 60 sec: 48469.3, 300 sec: 44939.0). Total num frames: 23166976. Throughput: 0: 12105.7. Samples: 770120. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:10:01,217][25826] Avg episode reward: [(0, '35.112')] -[2024-07-05 11:10:02,140][29833] Updated weights for policy 0, policy_version 5276 (0.0007) -[2024-07-05 11:10:03,832][29833] Updated weights for policy 0, policy_version 5286 (0.0007) -[2024-07-05 11:10:05,551][29833] Updated weights for policy 0, policy_version 5296 (0.0008) -[2024-07-05 11:10:06,216][25826] Fps is (10 sec: 48333.3, 60 sec: 48605.9, 300 sec: 45219.9). Total num frames: 23412736. Throughput: 0: 12126.3. Samples: 842844. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:10:06,217][25826] Avg episode reward: [(0, '31.711')] -[2024-07-05 11:10:07,200][29833] Updated weights for policy 0, policy_version 5306 (0.0011) -[2024-07-05 11:10:08,888][29833] Updated weights for policy 0, policy_version 5316 (0.0010) -[2024-07-05 11:10:10,592][29833] Updated weights for policy 0, policy_version 5326 (0.0007) -[2024-07-05 11:10:11,216][25826] Fps is (10 sec: 48333.0, 60 sec: 48469.5, 300 sec: 45363.3). Total num frames: 23650304. Throughput: 0: 12122.9. Samples: 879024. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:10:11,217][25826] Avg episode reward: [(0, '35.476')] -[2024-07-05 11:10:12,267][29833] Updated weights for policy 0, policy_version 5336 (0.0010) -[2024-07-05 11:10:13,989][29833] Updated weights for policy 0, policy_version 5346 (0.0008) -[2024-07-05 11:10:15,689][29833] Updated weights for policy 0, policy_version 5356 (0.0011) -[2024-07-05 11:10:16,216][25826] Fps is (10 sec: 47513.2, 60 sec: 48332.9, 300 sec: 45489.7). Total num frames: 23887872. Throughput: 0: 12132.7. Samples: 951448. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:10:16,217][25826] Avg episode reward: [(0, '37.491')] -[2024-07-05 11:10:16,243][29813] Saving new best policy, reward=37.491! -[2024-07-05 11:10:17,432][29833] Updated weights for policy 0, policy_version 5366 (0.0009) -[2024-07-05 11:10:19,104][29833] Updated weights for policy 0, policy_version 5376 (0.0008) -[2024-07-05 11:10:20,788][29833] Updated weights for policy 0, policy_version 5386 (0.0008) -[2024-07-05 11:10:21,216][25826] Fps is (10 sec: 48332.4, 60 sec: 48469.3, 300 sec: 45693.2). Total num frames: 24133632. Throughput: 0: 12136.2. Samples: 1024100. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:10:21,217][25826] Avg episode reward: [(0, '33.566')] -[2024-07-05 11:10:22,443][29833] Updated weights for policy 0, policy_version 5396 (0.0011) -[2024-07-05 11:10:24,138][29833] Updated weights for policy 0, policy_version 5406 (0.0007) -[2024-07-05 11:10:25,832][29833] Updated weights for policy 0, policy_version 5416 (0.0009) -[2024-07-05 11:10:26,216][25826] Fps is (10 sec: 49152.6, 60 sec: 48469.4, 300 sec: 45875.3). Total num frames: 24379392. Throughput: 0: 12143.3. Samples: 1060764. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:10:26,217][25826] Avg episode reward: [(0, '36.782')] -[2024-07-05 11:10:27,511][29833] Updated weights for policy 0, policy_version 5426 (0.0007) -[2024-07-05 11:10:29,191][29833] Updated weights for policy 0, policy_version 5436 (0.0011) -[2024-07-05 11:10:30,927][29833] Updated weights for policy 0, policy_version 5446 (0.0008) -[2024-07-05 11:10:31,216][25826] Fps is (10 sec: 48333.4, 60 sec: 48469.4, 300 sec: 45957.2). Total num frames: 24616960. Throughput: 0: 12128.0. Samples: 1133168. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:10:31,217][25826] Avg episode reward: [(0, '35.240')] -[2024-07-05 11:10:32,620][29833] Updated weights for policy 0, policy_version 5456 (0.0008) -[2024-07-05 11:10:34,315][29833] Updated weights for policy 0, policy_version 5466 (0.0009) -[2024-07-05 11:10:36,003][29833] Updated weights for policy 0, policy_version 5476 (0.0008) -[2024-07-05 11:10:36,216][25826] Fps is (10 sec: 48332.1, 60 sec: 48605.8, 300 sec: 46109.3). Total num frames: 24862720. Throughput: 0: 12113.3. Samples: 1206052. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:10:36,217][25826] Avg episode reward: [(0, '36.431')] -[2024-07-05 11:10:37,660][29833] Updated weights for policy 0, policy_version 5486 (0.0008) -[2024-07-05 11:10:39,325][29833] Updated weights for policy 0, policy_version 5496 (0.0007) -[2024-07-05 11:10:41,035][29833] Updated weights for policy 0, policy_version 5506 (0.0007) -[2024-07-05 11:10:41,216][25826] Fps is (10 sec: 49151.5, 60 sec: 48605.9, 300 sec: 46247.6). Total num frames: 25108480. Throughput: 0: 12112.5. Samples: 1242544. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:10:41,217][25826] Avg episode reward: [(0, '35.331')] -[2024-07-05 11:10:41,221][29813] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000005507_25108480.pth... -[2024-07-05 11:10:41,293][29813] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth -[2024-07-05 11:10:42,706][29833] Updated weights for policy 0, policy_version 5516 (0.0010) -[2024-07-05 11:10:44,405][29833] Updated weights for policy 0, policy_version 5526 (0.0008) -[2024-07-05 11:10:46,140][29833] Updated weights for policy 0, policy_version 5536 (0.0008) -[2024-07-05 11:10:46,216][25826] Fps is (10 sec: 48332.9, 60 sec: 48469.4, 300 sec: 46302.6). Total num frames: 25346048. Throughput: 0: 12108.1. Samples: 1314984. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:10:46,217][25826] Avg episode reward: [(0, '36.214')] -[2024-07-05 11:10:47,846][29833] Updated weights for policy 0, policy_version 5546 (0.0008) -[2024-07-05 11:10:49,498][29833] Updated weights for policy 0, policy_version 5556 (0.0009) -[2024-07-05 11:10:51,216][25826] Fps is (10 sec: 48333.1, 60 sec: 48469.4, 300 sec: 46421.4). Total num frames: 25591808. Throughput: 0: 12113.2. Samples: 1387940. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:10:51,217][25826] Avg episode reward: [(0, '32.639')] -[2024-07-05 11:10:51,221][29833] Updated weights for policy 0, policy_version 5566 (0.0010) -[2024-07-05 11:10:52,848][29833] Updated weights for policy 0, policy_version 5576 (0.0011) -[2024-07-05 11:10:54,513][29833] Updated weights for policy 0, policy_version 5586 (0.0008) -[2024-07-05 11:10:56,216][25826] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 46465.0). Total num frames: 25829376. Throughput: 0: 12119.6. Samples: 1424408. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:10:56,217][25826] Avg episode reward: [(0, '32.414')] -[2024-07-05 11:10:56,224][29833] Updated weights for policy 0, policy_version 5596 (0.0009) -[2024-07-05 11:10:57,967][29833] Updated weights for policy 0, policy_version 5606 (0.0010) -[2024-07-05 11:10:59,657][29833] Updated weights for policy 0, policy_version 5616 (0.0008) -[2024-07-05 11:11:01,216][25826] Fps is (10 sec: 48331.8, 60 sec: 48469.2, 300 sec: 46568.4). Total num frames: 26075136. Throughput: 0: 12120.1. Samples: 1496856. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:11:01,217][25826] Avg episode reward: [(0, '37.009')] -[2024-07-05 11:11:01,279][29833] Updated weights for policy 0, policy_version 5626 (0.0009) -[2024-07-05 11:11:02,922][29833] Updated weights for policy 0, policy_version 5636 (0.0008) -[2024-07-05 11:11:04,557][29833] Updated weights for policy 0, policy_version 5646 (0.0007) -[2024-07-05 11:11:06,216][25826] Fps is (10 sec: 49152.6, 60 sec: 48469.3, 300 sec: 46664.1). Total num frames: 26320896. Throughput: 0: 12150.2. Samples: 1570860. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:11:06,216][25826] Avg episode reward: [(0, '33.307')] -[2024-07-05 11:11:06,250][29833] Updated weights for policy 0, policy_version 5656 (0.0008) -[2024-07-05 11:11:07,930][29833] Updated weights for policy 0, policy_version 5666 (0.0007) -[2024-07-05 11:11:09,550][29833] Updated weights for policy 0, policy_version 5676 (0.0009) -[2024-07-05 11:11:11,216][25826] Fps is (10 sec: 49152.7, 60 sec: 48605.8, 300 sec: 46752.9). Total num frames: 26566656. Throughput: 0: 12157.8. Samples: 1607868. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:11:11,217][25826] Avg episode reward: [(0, '36.185')] -[2024-07-05 11:11:11,241][29833] Updated weights for policy 0, policy_version 5686 (0.0008) -[2024-07-05 11:11:12,931][29833] Updated weights for policy 0, policy_version 5696 (0.0010) -[2024-07-05 11:11:14,594][29833] Updated weights for policy 0, policy_version 5706 (0.0009) -[2024-07-05 11:11:16,216][25826] Fps is (10 sec: 49151.6, 60 sec: 48742.4, 300 sec: 46835.7). Total num frames: 26812416. Throughput: 0: 12189.4. Samples: 1681692. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:11:16,217][25826] Avg episode reward: [(0, '34.934')] -[2024-07-05 11:11:16,275][29833] Updated weights for policy 0, policy_version 5716 (0.0010) -[2024-07-05 11:11:17,961][29833] Updated weights for policy 0, policy_version 5726 (0.0008) -[2024-07-05 11:11:19,631][29833] Updated weights for policy 0, policy_version 5736 (0.0008) -[2024-07-05 11:11:21,216][25826] Fps is (10 sec: 49152.1, 60 sec: 48742.4, 300 sec: 46912.9). Total num frames: 27058176. Throughput: 0: 12184.4. Samples: 1754348. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:11:21,217][25826] Avg episode reward: [(0, '36.772')] -[2024-07-05 11:11:21,362][29833] Updated weights for policy 0, policy_version 5746 (0.0009) -[2024-07-05 11:11:23,024][29833] Updated weights for policy 0, policy_version 5756 (0.0012) -[2024-07-05 11:11:24,685][29833] Updated weights for policy 0, policy_version 5766 (0.0007) -[2024-07-05 11:11:26,216][25826] Fps is (10 sec: 49152.0, 60 sec: 48742.3, 300 sec: 46985.1). Total num frames: 27303936. Throughput: 0: 12190.3. Samples: 1791108. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:11:26,218][25826] Avg episode reward: [(0, '32.935')] -[2024-07-05 11:11:26,346][29833] Updated weights for policy 0, policy_version 5776 (0.0008) -[2024-07-05 11:11:28,021][29833] Updated weights for policy 0, policy_version 5786 (0.0008) -[2024-07-05 11:11:29,705][29833] Updated weights for policy 0, policy_version 5796 (0.0008) -[2024-07-05 11:11:31,216][25826] Fps is (10 sec: 49151.9, 60 sec: 48878.8, 300 sec: 47052.8). Total num frames: 27549696. Throughput: 0: 12218.9. Samples: 1864836. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:11:31,217][25826] Avg episode reward: [(0, '34.976')] -[2024-07-05 11:11:31,357][29833] Updated weights for policy 0, policy_version 5806 (0.0010) -[2024-07-05 11:11:33,007][29833] Updated weights for policy 0, policy_version 5816 (0.0008) -[2024-07-05 11:11:34,659][29833] Updated weights for policy 0, policy_version 5826 (0.0007) -[2024-07-05 11:11:36,216][25826] Fps is (10 sec: 49152.3, 60 sec: 48879.0, 300 sec: 47116.5). Total num frames: 27795456. Throughput: 0: 12239.3. Samples: 1938708. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:11:36,217][25826] Avg episode reward: [(0, '39.214')] -[2024-07-05 11:11:36,218][29813] Saving new best policy, reward=39.214! -[2024-07-05 11:11:36,354][29833] Updated weights for policy 0, policy_version 5836 (0.0008) -[2024-07-05 11:11:38,050][29833] Updated weights for policy 0, policy_version 5846 (0.0009) -[2024-07-05 11:11:39,745][29833] Updated weights for policy 0, policy_version 5856 (0.0008) -[2024-07-05 11:11:41,216][25826] Fps is (10 sec: 49152.1, 60 sec: 48878.9, 300 sec: 47176.3). Total num frames: 28041216. Throughput: 0: 12226.3. Samples: 1974592. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:11:41,217][25826] Avg episode reward: [(0, '36.431')] -[2024-07-05 11:11:41,381][29833] Updated weights for policy 0, policy_version 5866 (0.0010) -[2024-07-05 11:11:43,041][29833] Updated weights for policy 0, policy_version 5876 (0.0007) -[2024-07-05 11:11:44,758][29833] Updated weights for policy 0, policy_version 5886 (0.0007) -[2024-07-05 11:11:46,216][25826] Fps is (10 sec: 49151.8, 60 sec: 49015.5, 300 sec: 47232.8). Total num frames: 28286976. Throughput: 0: 12258.4. Samples: 2048480. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:11:46,217][25826] Avg episode reward: [(0, '36.150')] -[2024-07-05 11:11:46,367][29833] Updated weights for policy 0, policy_version 5896 (0.0008) -[2024-07-05 11:11:48,039][29833] Updated weights for policy 0, policy_version 5906 (0.0009) -[2024-07-05 11:11:49,745][29833] Updated weights for policy 0, policy_version 5916 (0.0010) -[2024-07-05 11:11:51,216][25826] Fps is (10 sec: 48332.9, 60 sec: 48878.9, 300 sec: 47240.6). Total num frames: 28524544. Throughput: 0: 12253.4. Samples: 2122264. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:11:51,217][25826] Avg episode reward: [(0, '36.691')] -[2024-07-05 11:11:51,411][29833] Updated weights for policy 0, policy_version 5926 (0.0008) -[2024-07-05 11:11:53,036][29833] Updated weights for policy 0, policy_version 5936 (0.0008) -[2024-07-05 11:11:54,667][29833] Updated weights for policy 0, policy_version 5946 (0.0008) -[2024-07-05 11:11:56,216][25826] Fps is (10 sec: 49152.3, 60 sec: 49152.1, 300 sec: 47336.5). Total num frames: 28778496. Throughput: 0: 12257.0. Samples: 2159432. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:11:56,217][25826] Avg episode reward: [(0, '35.821')] -[2024-07-05 11:11:56,299][29833] Updated weights for policy 0, policy_version 5956 (0.0011) -[2024-07-05 11:11:58,007][29833] Updated weights for policy 0, policy_version 5966 (0.0012) -[2024-07-05 11:11:59,658][29833] Updated weights for policy 0, policy_version 5976 (0.0009) -[2024-07-05 11:12:01,217][25826] Fps is (10 sec: 49967.2, 60 sec: 49151.5, 300 sec: 47384.1). Total num frames: 29024256. Throughput: 0: 12274.1. Samples: 2234036. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:12:01,218][25826] Avg episode reward: [(0, '33.406')] -[2024-07-05 11:12:01,294][29833] Updated weights for policy 0, policy_version 5986 (0.0008) -[2024-07-05 11:12:02,925][29833] Updated weights for policy 0, policy_version 5996 (0.0008) -[2024-07-05 11:12:04,590][29833] Updated weights for policy 0, policy_version 6006 (0.0008) -[2024-07-05 11:12:06,216][25826] Fps is (10 sec: 49151.6, 60 sec: 49151.9, 300 sec: 47429.6). Total num frames: 29270016. Throughput: 0: 12313.5. Samples: 2308456. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:12:06,217][25826] Avg episode reward: [(0, '36.362')] -[2024-07-05 11:12:06,265][29833] Updated weights for policy 0, policy_version 6016 (0.0008) -[2024-07-05 11:12:07,959][29833] Updated weights for policy 0, policy_version 6026 (0.0008) -[2024-07-05 11:12:09,597][29833] Updated weights for policy 0, policy_version 6036 (0.0010) -[2024-07-05 11:12:11,216][25826] Fps is (10 sec: 49155.8, 60 sec: 49152.0, 300 sec: 47472.7). Total num frames: 29515776. Throughput: 0: 12311.4. Samples: 2345120. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:12:11,217][25826] Avg episode reward: [(0, '38.716')] -[2024-07-05 11:12:11,243][29833] Updated weights for policy 0, policy_version 6046 (0.0011) -[2024-07-05 11:12:12,889][29833] Updated weights for policy 0, policy_version 6056 (0.0007) -[2024-07-05 11:12:14,552][29833] Updated weights for policy 0, policy_version 6066 (0.0009) -[2024-07-05 11:12:16,197][29833] Updated weights for policy 0, policy_version 6076 (0.0008) -[2024-07-05 11:12:16,216][25826] Fps is (10 sec: 49971.4, 60 sec: 49288.5, 300 sec: 47553.6). Total num frames: 29769728. Throughput: 0: 12329.8. Samples: 2419676. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:12:16,217][25826] Avg episode reward: [(0, '37.614')] -[2024-07-05 11:12:17,860][29833] Updated weights for policy 0, policy_version 6086 (0.0010) -[2024-07-05 11:12:19,623][29833] Updated weights for policy 0, policy_version 6096 (0.0008) -[2024-07-05 11:12:21,216][25826] Fps is (10 sec: 48333.1, 60 sec: 49015.5, 300 sec: 47513.6). Total num frames: 29999104. Throughput: 0: 12291.3. Samples: 2491816. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:12:21,217][25826] Avg episode reward: [(0, '35.857')] -[2024-07-05 11:12:21,418][29813] Stopping Batcher_0... -[2024-07-05 11:12:21,419][29813] Loop batcher_evt_loop terminating... -[2024-07-05 11:12:21,419][25826] Component Batcher_0 stopped! -[2024-07-05 11:12:21,421][29813] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000006106_30015488.pth... -[2024-07-05 11:12:21,423][29833] Updated weights for policy 0, policy_version 6106 (0.0012) -[2024-07-05 11:12:21,442][29843] Stopping RolloutWorker_w9... -[2024-07-05 11:12:21,441][25826] Component RolloutWorker_w9 stopped! -[2024-07-05 11:12:21,442][29862] Stopping RolloutWorker_w13... -[2024-07-05 11:12:21,443][29843] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 11:12:21,443][29862] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 11:12:21,443][29835] Stopping RolloutWorker_w1... -[2024-07-05 11:12:21,444][29835] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 11:12:21,444][29836] Stopping RolloutWorker_w3... -[2024-07-05 11:12:21,444][29840] Stopping RolloutWorker_w5... -[2024-07-05 11:12:21,445][29836] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 11:12:21,445][29840] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 11:12:21,443][25826] Component RolloutWorker_w13 stopped! -[2024-07-05 11:12:21,445][29839] Stopping RolloutWorker_w6... -[2024-07-05 11:12:21,445][29834] Stopping RolloutWorker_w0... -[2024-07-05 11:12:21,446][29839] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 11:12:21,446][29844] Stopping RolloutWorker_w11... -[2024-07-05 11:12:21,446][29834] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 11:12:21,445][25826] Component RolloutWorker_w1 stopped! -[2024-07-05 11:12:21,446][29861] Stopping RolloutWorker_w12... -[2024-07-05 11:12:21,446][29844] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 11:12:21,447][29861] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 11:12:21,447][29838] Stopping RolloutWorker_w7... -[2024-07-05 11:12:21,447][25826] Component RolloutWorker_w3 stopped! -[2024-07-05 11:12:21,448][29838] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 11:12:21,448][25826] Component RolloutWorker_w5 stopped! -[2024-07-05 11:12:21,449][25826] Component RolloutWorker_w6 stopped! -[2024-07-05 11:12:21,449][25826] Component RolloutWorker_w0 stopped! -[2024-07-05 11:12:21,450][25826] Component RolloutWorker_w11 stopped! -[2024-07-05 11:12:21,450][29863] Stopping RolloutWorker_w14... -[2024-07-05 11:12:21,450][25826] Component RolloutWorker_w12 stopped! -[2024-07-05 11:12:21,451][29863] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 11:12:21,451][29842] Stopping RolloutWorker_w8... -[2024-07-05 11:12:21,451][29842] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 11:12:21,451][25826] Component RolloutWorker_w7 stopped! -[2024-07-05 11:12:21,452][25826] Component RolloutWorker_w14 stopped! -[2024-07-05 11:12:21,452][25826] Component RolloutWorker_w8 stopped! -[2024-07-05 11:12:21,455][29837] Stopping RolloutWorker_w2... -[2024-07-05 11:12:21,455][29837] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 11:12:21,455][25826] Component RolloutWorker_w2 stopped! -[2024-07-05 11:12:21,460][29864] Stopping RolloutWorker_w15... -[2024-07-05 11:12:21,460][29864] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 11:12:21,460][25826] Component RolloutWorker_w15 stopped! -[2024-07-05 11:12:21,472][29845] Stopping RolloutWorker_w10... -[2024-07-05 11:12:21,472][29845] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 11:12:21,472][25826] Component RolloutWorker_w10 stopped! -[2024-07-05 11:12:21,485][29833] Weights refcount: 2 0 -[2024-07-05 11:12:21,486][29833] Stopping InferenceWorker_p0-w0... -[2024-07-05 11:12:21,487][29841] Stopping RolloutWorker_w4... -[2024-07-05 11:12:21,487][29833] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 11:12:21,487][25826] Component RolloutWorker_w4 stopped! -[2024-07-05 11:12:21,488][25826] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 11:12:21,487][29841] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 11:12:21,517][29813] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004886_20021248.pth -[2024-07-05 11:12:21,525][29813] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000006106_30015488.pth... -[2024-07-05 11:12:21,631][29813] Stopping LearnerWorker_p0... -[2024-07-05 11:12:21,632][29813] Loop learner_proc0_evt_loop terminating... -[2024-07-05 11:12:21,632][25826] Component LearnerWorker_p0 stopped! -[2024-07-05 11:12:21,633][25826] Waiting for process learner_proc0 to stop... -[2024-07-05 11:12:22,931][25826] Waiting for process inference_proc0-0 to join... -[2024-07-05 11:12:22,932][25826] Waiting for process rollout_proc0 to join... -[2024-07-05 11:12:22,933][25826] Waiting for process rollout_proc1 to join... -[2024-07-05 11:12:22,934][25826] Waiting for process rollout_proc2 to join... -[2024-07-05 11:12:22,934][25826] Waiting for process rollout_proc3 to join... -[2024-07-05 11:12:22,934][25826] Waiting for process rollout_proc4 to join... -[2024-07-05 11:12:22,935][25826] Waiting for process rollout_proc5 to join... -[2024-07-05 11:12:22,935][25826] Waiting for process rollout_proc6 to join... -[2024-07-05 11:12:22,935][25826] Waiting for process rollout_proc7 to join... -[2024-07-05 11:12:22,936][25826] Waiting for process rollout_proc8 to join... -[2024-07-05 11:12:22,936][25826] Waiting for process rollout_proc9 to join... -[2024-07-05 11:12:22,937][25826] Waiting for process rollout_proc10 to join... -[2024-07-05 11:12:22,937][25826] Waiting for process rollout_proc11 to join... -[2024-07-05 11:12:22,937][25826] Waiting for process rollout_proc12 to join... -[2024-07-05 11:12:22,938][25826] Waiting for process rollout_proc13 to join... -[2024-07-05 11:12:22,938][25826] Waiting for process rollout_proc14 to join... -[2024-07-05 11:12:22,939][25826] Waiting for process rollout_proc15 to join... -[2024-07-05 11:12:22,939][25826] Batcher 0 profile tree view: -batching: 14.8929, releasing_batches: 0.0282 -[2024-07-05 11:12:22,940][25826] InferenceWorker_p0-w0 profile tree view: -wait_policy: 0.0000 - wait_policy_total: 11.3309 -update_model: 3.1492 - weight_update: 0.0012 -one_step: 0.0028 - handle_policy_step: 188.4823 - deserialize: 14.9643, stack: 1.0768, obs_to_device_normalize: 44.9296, forward: 87.6885, send_messages: 9.3687 - prepare_outputs: 24.0666 - to_cpu: 14.6811 -[2024-07-05 11:12:22,940][25826] Learner 0 profile tree view: -misc: 0.0057, prepare_batch: 20.4061 -train: 44.2431 - epoch_init: 0.0038, minibatch_init: 0.0054, losses_postprocess: 0.2628, kl_divergence: 0.2776, after_optimizer: 0.3583 - calculate_losses: 15.6467 - losses_init: 0.0021, forward_head: 0.7716, bptt_initial: 12.2530, tail: 0.5449, advantages_returns: 0.1489, losses: 0.8499 - bptt: 0.9129 - bptt_forward_core: 0.8746 - update: 27.3009 - clip: 0.8210 -[2024-07-05 11:12:22,940][25826] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.0874, enqueue_policy_requests: 6.0294, env_step: 96.2884, overhead: 9.9223, complete_rollouts: 0.2078 -save_policy_outputs: 7.4682 - split_output_tensors: 3.5055 -[2024-07-05 11:12:22,941][25826] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.0916, enqueue_policy_requests: 6.4977, env_step: 98.8794, overhead: 10.2573, complete_rollouts: 0.2206 -save_policy_outputs: 7.3652 - split_output_tensors: 3.4276 -[2024-07-05 11:12:22,941][25826] Loop Runner_EvtLoop terminating... -[2024-07-05 11:12:22,941][25826] Runner profile tree view: -main_loop: 218.9841 -[2024-07-05 11:12:22,942][25826] Collected {0: 30015488}, FPS: 45639.1 -[2024-07-05 11:12:49,426][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 11:12:49,428][25826] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 11:12:49,428][25826] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 11:12:49,429][25826] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 11:12:49,429][25826] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 11:12:49,429][25826] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 11:12:49,429][25826] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 11:12:49,430][25826] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 11:12:49,430][25826] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 11:12:49,430][25826] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 11:12:49,430][25826] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 11:12:49,431][25826] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 11:12:49,431][25826] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 11:12:49,431][25826] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 11:12:49,432][25826] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 11:12:49,445][25826] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:12:49,447][25826] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:12:49,448][25826] RunningMeanStd input shape: (1,) -[2024-07-05 11:12:49,456][25826] ConvEncoder: input_channels=3 -[2024-07-05 11:12:49,509][25826] Conv encoder output size: 512 -[2024-07-05 11:12:49,510][25826] Policy head output size: 512 -[2024-07-05 11:12:51,187][25826] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000006106_30015488.pth... -[2024-07-05 11:12:51,997][25826] Num frames 100... -[2024-07-05 11:12:52,064][25826] Num frames 200... -[2024-07-05 11:12:52,129][25826] Num frames 300... -[2024-07-05 11:12:52,196][25826] Num frames 400... -[2024-07-05 11:12:52,259][25826] Num frames 500... -[2024-07-05 11:12:52,324][25826] Num frames 600... -[2024-07-05 11:12:52,390][25826] Num frames 700... -[2024-07-05 11:12:52,446][25826] Avg episode rewards: #0: 13.040, true rewards: #0: 7.040 -[2024-07-05 11:12:52,448][25826] Avg episode reward: 13.040, avg true_objective: 7.040 -[2024-07-05 11:12:52,512][25826] Num frames 800... -[2024-07-05 11:12:52,578][25826] Num frames 900... -[2024-07-05 11:12:52,643][25826] Num frames 1000... -[2024-07-05 11:12:52,709][25826] Num frames 1100... -[2024-07-05 11:12:52,774][25826] Num frames 1200... -[2024-07-05 11:12:52,840][25826] Num frames 1300... -[2024-07-05 11:12:52,904][25826] Num frames 1400... -[2024-07-05 11:12:52,971][25826] Num frames 1500... -[2024-07-05 11:12:53,034][25826] Num frames 1600... -[2024-07-05 11:12:53,097][25826] Num frames 1700... -[2024-07-05 11:12:53,159][25826] Num frames 1800... -[2024-07-05 11:12:53,221][25826] Num frames 1900... -[2024-07-05 11:12:53,297][25826] Num frames 2000... -[2024-07-05 11:12:53,389][25826] Num frames 2100... -[2024-07-05 11:12:53,489][25826] Avg episode rewards: #0: 23.720, true rewards: #0: 10.720 -[2024-07-05 11:12:53,490][25826] Avg episode reward: 23.720, avg true_objective: 10.720 -[2024-07-05 11:12:53,530][25826] Num frames 2200... -[2024-07-05 11:12:53,591][25826] Num frames 2300... -[2024-07-05 11:12:53,653][25826] Num frames 2400... -[2024-07-05 11:12:53,716][25826] Num frames 2500... -[2024-07-05 11:12:53,781][25826] Num frames 2600... -[2024-07-05 11:12:53,850][25826] Num frames 2700... -[2024-07-05 11:12:53,918][25826] Num frames 2800... -[2024-07-05 11:12:53,984][25826] Num frames 2900... -[2024-07-05 11:12:54,048][25826] Num frames 3000... -[2024-07-05 11:12:54,116][25826] Num frames 3100... -[2024-07-05 11:12:54,194][25826] Num frames 3200... -[2024-07-05 11:12:54,272][25826] Num frames 3300... -[2024-07-05 11:12:54,340][25826] Num frames 3400... -[2024-07-05 11:12:54,407][25826] Num frames 3500... -[2024-07-05 11:12:54,471][25826] Num frames 3600... -[2024-07-05 11:12:54,534][25826] Num frames 3700... -[2024-07-05 11:12:54,598][25826] Num frames 3800... -[2024-07-05 11:12:54,663][25826] Num frames 3900... -[2024-07-05 11:12:54,742][25826] Avg episode rewards: #0: 32.120, true rewards: #0: 13.120 -[2024-07-05 11:12:54,743][25826] Avg episode reward: 32.120, avg true_objective: 13.120 -[2024-07-05 11:12:54,789][25826] Num frames 4000... -[2024-07-05 11:12:54,853][25826] Num frames 4100... -[2024-07-05 11:12:54,914][25826] Num frames 4200... -[2024-07-05 11:12:54,975][25826] Num frames 4300... -[2024-07-05 11:12:55,038][25826] Num frames 4400... -[2024-07-05 11:12:55,101][25826] Num frames 4500... -[2024-07-05 11:12:55,163][25826] Num frames 4600... -[2024-07-05 11:12:55,225][25826] Num frames 4700... -[2024-07-05 11:12:55,287][25826] Num frames 4800... -[2024-07-05 11:12:55,349][25826] Num frames 4900... -[2024-07-05 11:12:55,414][25826] Num frames 5000... -[2024-07-05 11:12:55,478][25826] Num frames 5100... -[2024-07-05 11:12:55,541][25826] Num frames 5200... -[2024-07-05 11:12:55,606][25826] Num frames 5300... -[2024-07-05 11:12:55,669][25826] Num frames 5400... -[2024-07-05 11:12:55,734][25826] Num frames 5500... -[2024-07-05 11:12:55,800][25826] Num frames 5600... -[2024-07-05 11:12:55,864][25826] Num frames 5700... -[2024-07-05 11:12:55,928][25826] Num frames 5800... -[2024-07-05 11:12:55,992][25826] Num frames 5900... -[2024-07-05 11:12:56,053][25826] Num frames 6000... -[2024-07-05 11:12:56,130][25826] Avg episode rewards: #0: 39.089, true rewards: #0: 15.090 -[2024-07-05 11:12:56,131][25826] Avg episode reward: 39.089, avg true_objective: 15.090 -[2024-07-05 11:12:56,177][25826] Num frames 6100... -[2024-07-05 11:12:56,237][25826] Num frames 6200... -[2024-07-05 11:12:56,299][25826] Num frames 6300... -[2024-07-05 11:12:56,364][25826] Num frames 6400... -[2024-07-05 11:12:56,431][25826] Avg episode rewards: #0: 32.040, true rewards: #0: 12.840 -[2024-07-05 11:12:56,432][25826] Avg episode reward: 32.040, avg true_objective: 12.840 -[2024-07-05 11:12:56,487][25826] Num frames 6500... -[2024-07-05 11:12:56,549][25826] Num frames 6600... -[2024-07-05 11:12:56,611][25826] Num frames 6700... -[2024-07-05 11:12:56,672][25826] Num frames 6800... -[2024-07-05 11:12:56,735][25826] Num frames 6900... -[2024-07-05 11:12:56,797][25826] Num frames 7000... -[2024-07-05 11:12:56,858][25826] Num frames 7100... -[2024-07-05 11:12:56,922][25826] Num frames 7200... -[2024-07-05 11:12:56,984][25826] Num frames 7300... -[2024-07-05 11:12:57,045][25826] Num frames 7400... -[2024-07-05 11:12:57,108][25826] Num frames 7500... -[2024-07-05 11:12:57,172][25826] Num frames 7600... -[2024-07-05 11:12:57,236][25826] Num frames 7700... -[2024-07-05 11:12:57,311][25826] Num frames 7800... -[2024-07-05 11:12:57,378][25826] Num frames 7900... -[2024-07-05 11:12:57,440][25826] Num frames 8000... -[2024-07-05 11:12:57,504][25826] Num frames 8100... -[2024-07-05 11:12:57,580][25826] Num frames 8200... -[2024-07-05 11:12:57,650][25826] Num frames 8300... -[2024-07-05 11:12:57,728][25826] Num frames 8400... -[2024-07-05 11:12:57,821][25826] Num frames 8500... -[2024-07-05 11:12:57,893][25826] Avg episode rewards: #0: 36.199, true rewards: #0: 14.200 -[2024-07-05 11:12:57,894][25826] Avg episode reward: 36.199, avg true_objective: 14.200 -[2024-07-05 11:12:57,947][25826] Num frames 8600... -[2024-07-05 11:12:58,010][25826] Num frames 8700... -[2024-07-05 11:12:58,075][25826] Num frames 8800... -[2024-07-05 11:12:58,139][25826] Num frames 8900... -[2024-07-05 11:12:58,202][25826] Num frames 9000... -[2024-07-05 11:12:58,266][25826] Num frames 9100... -[2024-07-05 11:12:58,329][25826] Num frames 9200... -[2024-07-05 11:12:58,396][25826] Num frames 9300... -[2024-07-05 11:12:58,462][25826] Num frames 9400... -[2024-07-05 11:12:58,529][25826] Num frames 9500... -[2024-07-05 11:12:58,592][25826] Num frames 9600... -[2024-07-05 11:12:58,655][25826] Num frames 9700... -[2024-07-05 11:12:58,720][25826] Num frames 9800... -[2024-07-05 11:12:58,783][25826] Num frames 9900... -[2024-07-05 11:12:58,845][25826] Num frames 10000... -[2024-07-05 11:12:58,910][25826] Num frames 10100... -[2024-07-05 11:12:58,973][25826] Num frames 10200... -[2024-07-05 11:12:59,038][25826] Num frames 10300... -[2024-07-05 11:12:59,102][25826] Num frames 10400... -[2024-07-05 11:12:59,164][25826] Num frames 10500... -[2024-07-05 11:12:59,229][25826] Num frames 10600... -[2024-07-05 11:12:59,295][25826] Avg episode rewards: #0: 39.028, true rewards: #0: 15.171 -[2024-07-05 11:12:59,296][25826] Avg episode reward: 39.028, avg true_objective: 15.171 -[2024-07-05 11:12:59,350][25826] Num frames 10700... -[2024-07-05 11:12:59,415][25826] Num frames 10800... -[2024-07-05 11:12:59,477][25826] Num frames 10900... -[2024-07-05 11:12:59,540][25826] Num frames 11000... -[2024-07-05 11:12:59,602][25826] Num frames 11100... -[2024-07-05 11:12:59,669][25826] Num frames 11200... -[2024-07-05 11:12:59,733][25826] Num frames 11300... -[2024-07-05 11:12:59,798][25826] Num frames 11400... -[2024-07-05 11:12:59,863][25826] Num frames 11500... -[2024-07-05 11:12:59,927][25826] Num frames 11600... -[2024-07-05 11:12:59,990][25826] Num frames 11700... -[2024-07-05 11:13:00,064][25826] Num frames 11800... -[2024-07-05 11:13:00,163][25826] Avg episode rewards: #0: 37.586, true rewards: #0: 14.836 -[2024-07-05 11:13:00,165][25826] Avg episode reward: 37.586, avg true_objective: 14.836 -[2024-07-05 11:13:00,191][25826] Num frames 11900... -[2024-07-05 11:13:00,253][25826] Num frames 12000... -[2024-07-05 11:13:00,316][25826] Num frames 12100... -[2024-07-05 11:13:00,380][25826] Num frames 12200... -[2024-07-05 11:13:00,443][25826] Num frames 12300... -[2024-07-05 11:13:00,505][25826] Num frames 12400... -[2024-07-05 11:13:00,567][25826] Num frames 12500... -[2024-07-05 11:13:00,630][25826] Num frames 12600... -[2024-07-05 11:13:00,693][25826] Num frames 12700... -[2024-07-05 11:13:00,758][25826] Num frames 12800... -[2024-07-05 11:13:00,821][25826] Num frames 12900... -[2024-07-05 11:13:00,883][25826] Num frames 13000... -[2024-07-05 11:13:00,945][25826] Num frames 13100... -[2024-07-05 11:13:01,009][25826] Num frames 13200... -[2024-07-05 11:13:01,072][25826] Num frames 13300... -[2024-07-05 11:13:01,135][25826] Num frames 13400... -[2024-07-05 11:13:01,199][25826] Avg episode rewards: #0: 38.573, true rewards: #0: 14.907 -[2024-07-05 11:13:01,199][25826] Avg episode reward: 38.573, avg true_objective: 14.907 -[2024-07-05 11:13:01,257][25826] Num frames 13500... -[2024-07-05 11:13:01,321][25826] Num frames 13600... -[2024-07-05 11:13:01,389][25826] Num frames 13700... -[2024-07-05 11:13:01,452][25826] Num frames 13800... -[2024-07-05 11:13:01,516][25826] Num frames 13900... -[2024-07-05 11:13:01,580][25826] Num frames 14000... -[2024-07-05 11:13:01,644][25826] Num frames 14100... -[2024-07-05 11:13:01,713][25826] Num frames 14200... -[2024-07-05 11:13:01,777][25826] Num frames 14300... -[2024-07-05 11:13:01,842][25826] Num frames 14400... -[2024-07-05 11:13:01,907][25826] Num frames 14500... -[2024-07-05 11:13:01,970][25826] Num frames 14600... -[2024-07-05 11:13:02,035][25826] Num frames 14700... -[2024-07-05 11:13:02,100][25826] Num frames 14800... -[2024-07-05 11:13:02,168][25826] Num frames 14900... -[2024-07-05 11:13:02,232][25826] Num frames 15000... -[2024-07-05 11:13:02,295][25826] Num frames 15100... -[2024-07-05 11:13:02,358][25826] Num frames 15200... -[2024-07-05 11:13:02,424][25826] Num frames 15300... -[2024-07-05 11:13:02,486][25826] Num frames 15400... -[2024-07-05 11:13:02,547][25826] Num frames 15500... -[2024-07-05 11:13:02,610][25826] Avg episode rewards: #0: 40.615, true rewards: #0: 15.516 -[2024-07-05 11:13:02,611][25826] Avg episode reward: 40.615, avg true_objective: 15.516 -[2024-07-05 11:13:18,867][25826] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 11:17:18,257][25826] Environment doom_basic already registered, overwriting... -[2024-07-05 11:17:18,258][25826] Environment doom_two_colors_easy already registered, overwriting... -[2024-07-05 11:17:18,259][25826] Environment doom_two_colors_hard already registered, overwriting... -[2024-07-05 11:17:18,259][25826] Environment doom_dm already registered, overwriting... -[2024-07-05 11:17:18,259][25826] Environment doom_dwango5 already registered, overwriting... -[2024-07-05 11:17:18,259][25826] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-07-05 11:17:18,260][25826] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-07-05 11:17:18,260][25826] Environment doom_my_way_home already registered, overwriting... -[2024-07-05 11:17:18,260][25826] Environment doom_deadly_corridor already registered, overwriting... -[2024-07-05 11:17:18,260][25826] Environment doom_defend_the_center already registered, overwriting... -[2024-07-05 11:17:18,261][25826] Environment doom_defend_the_line already registered, overwriting... -[2024-07-05 11:17:18,261][25826] Environment doom_health_gathering already registered, overwriting... -[2024-07-05 11:17:18,261][25826] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-07-05 11:17:18,262][25826] Environment doom_battle already registered, overwriting... -[2024-07-05 11:17:18,262][25826] Environment doom_battle2 already registered, overwriting... -[2024-07-05 11:17:18,262][25826] Environment doom_duel_bots already registered, overwriting... -[2024-07-05 11:17:18,262][25826] Environment doom_deathmatch_bots already registered, overwriting... -[2024-07-05 11:17:18,262][25826] Environment doom_duel already registered, overwriting... -[2024-07-05 11:17:18,263][25826] Environment doom_deathmatch_full already registered, overwriting... -[2024-07-05 11:17:18,263][25826] Environment doom_benchmark already registered, overwriting... -[2024-07-05 11:17:18,263][25826] register_encoder_factory: -[2024-07-05 11:17:18,268][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 11:17:18,269][25826] Overriding arg 'train_for_env_steps' with value 50000000 passed from command line -[2024-07-05 11:17:18,273][25826] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment already exists! -[2024-07-05 11:17:18,274][25826] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment... -[2024-07-05 11:17:18,274][25826] Weights and Biases integration disabled -[2024-07-05 11:17:18,275][25826] Environment var CUDA_VISIBLE_DEVICES is 0 - -[2024-07-05 11:17:20,649][25826] Starting experiment with the following configuration: -help=False -algo=APPO -env=doom_health_gathering_supreme -experiment=default_experiment -train_dir=/home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir -restart_behavior=resume -device=gpu -seed=200 -num_policies=1 -async_rl=True -serial_mode=False -batched_sampling=False -num_batches_to_accumulate=2 -worker_num_splits=2 -policy_workers_per_policy=1 -max_policy_lag=1000 -num_workers=16 -num_envs_per_worker=8 -batch_size=2048 -num_batches_per_epoch=1 -num_epochs=1 -rollout=32 -recurrence=32 -shuffle_minibatches=False -gamma=0.99 -reward_scale=1.0 -reward_clip=1000.0 -value_bootstrap=False -normalize_returns=True -exploration_loss_coeff=0.001 -value_loss_coeff=0.5 -kl_loss_coeff=0.0 -exploration_loss=symmetric_kl -gae_lambda=0.95 -ppo_clip_ratio=0.1 -ppo_clip_value=0.2 -with_vtrace=False -vtrace_rho=1.0 -vtrace_c=1.0 -optimizer=adam -adam_eps=1e-06 -adam_beta1=0.9 -adam_beta2=0.999 -max_grad_norm=4.0 -learning_rate=0.0001 -lr_schedule=constant -lr_schedule_kl_threshold=0.008 -lr_adaptive_min=1e-06 -lr_adaptive_max=0.01 -obs_subtract_mean=0.0 -obs_scale=255.0 -normalize_input=True -normalize_input_keys=None -decorrelate_experience_max_seconds=0 -decorrelate_envs_on_one_worker=True -actor_worker_gpus=[] -set_workers_cpu_affinity=True -force_envs_single_thread=False -default_niceness=0 -log_to_file=True -experiment_summaries_interval=10 -flush_summaries_interval=30 -stats_avg=100 -summaries_use_frameskip=True -heartbeat_interval=20 -heartbeat_reporting_interval=600 -train_for_env_steps=50000000 -train_for_seconds=10000000000 -save_every_sec=120 -keep_checkpoints=2 -load_checkpoint_kind=latest -save_milestones_sec=-1 -save_best_every_sec=5 -save_best_metric=reward -save_best_after=100000 -benchmark=False -encoder_mlp_layers=[512, 512] -encoder_conv_architecture=convnet_simple -encoder_conv_mlp_layers=[512] -use_rnn=True -rnn_size=512 -rnn_type=gru -rnn_num_layers=1 -decoder_mlp_layers=[] -nonlinearity=elu -policy_initialization=orthogonal -policy_init_gain=1.0 -actor_critic_share_weights=True -adaptive_stddev=True -continuous_tanh_scale=0.0 -initial_stddev=1.0 -use_env_info_cache=False -env_gpu_actions=False -env_gpu_observations=True -env_frameskip=4 -env_framestack=1 -pixel_format=CHW -use_record_episode_statistics=False -with_wandb=False -wandb_user=None -wandb_project=sample_factory -wandb_group=None -wandb_job_type=SF -wandb_tags=[] -with_pbt=False -pbt_mix_policies_in_one_env=True -pbt_period_env_steps=5000000 -pbt_start_mutation=20000000 -pbt_replace_fraction=0.3 -pbt_mutation_rate=0.15 -pbt_replace_reward_gap=0.1 -pbt_replace_reward_gap_absolute=1e-06 -pbt_optimize_gamma=False -pbt_target_objective=true_objective -pbt_perturb_min=1.1 -pbt_perturb_max=1.5 -num_agents=-1 -num_humans=0 -num_bots=-1 -start_bot_difficulty=None -timelimit=None -res_w=128 -res_h=72 -wide_aspect_ratio=False -eval_env_frameskip=1 -fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=20000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 20000000} -git_hash=unknown -git_repo_name=not a git repository -[2024-07-05 11:17:20,650][25826] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 11:17:20,651][25826] Rollout worker 0 uses device cpu -[2024-07-05 11:17:20,651][25826] Rollout worker 1 uses device cpu -[2024-07-05 11:17:20,651][25826] Rollout worker 2 uses device cpu -[2024-07-05 11:17:20,651][25826] Rollout worker 3 uses device cpu -[2024-07-05 11:17:20,652][25826] Rollout worker 4 uses device cpu -[2024-07-05 11:17:20,652][25826] Rollout worker 5 uses device cpu -[2024-07-05 11:17:20,653][25826] Rollout worker 6 uses device cpu -[2024-07-05 11:17:20,653][25826] Rollout worker 7 uses device cpu -[2024-07-05 11:17:20,653][25826] Rollout worker 8 uses device cpu -[2024-07-05 11:17:20,654][25826] Rollout worker 9 uses device cpu -[2024-07-05 11:17:20,654][25826] Rollout worker 10 uses device cpu -[2024-07-05 11:17:20,654][25826] Rollout worker 11 uses device cpu -[2024-07-05 11:17:20,654][25826] Rollout worker 12 uses device cpu -[2024-07-05 11:17:20,655][25826] Rollout worker 13 uses device cpu -[2024-07-05 11:17:20,655][25826] Rollout worker 14 uses device cpu -[2024-07-05 11:17:20,655][25826] Rollout worker 15 uses device cpu -[2024-07-05 11:17:20,739][25826] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:17:20,740][25826] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 11:17:20,788][25826] Starting all processes... -[2024-07-05 11:17:20,789][25826] Starting process learner_proc0 -[2024-07-05 11:17:20,838][25826] Starting all processes... -[2024-07-05 11:17:20,844][25826] Starting process inference_proc0-0 -[2024-07-05 11:17:20,845][25826] Starting process rollout_proc0 -[2024-07-05 11:17:20,845][25826] Starting process rollout_proc1 -[2024-07-05 11:17:20,845][25826] Starting process rollout_proc2 -[2024-07-05 11:17:20,845][25826] Starting process rollout_proc3 -[2024-07-05 11:17:20,846][25826] Starting process rollout_proc4 -[2024-07-05 11:17:20,847][25826] Starting process rollout_proc5 -[2024-07-05 11:17:20,847][25826] Starting process rollout_proc6 -[2024-07-05 11:17:20,847][25826] Starting process rollout_proc7 -[2024-07-05 11:17:20,849][25826] Starting process rollout_proc8 -[2024-07-05 11:17:20,852][25826] Starting process rollout_proc9 -[2024-07-05 11:17:20,852][25826] Starting process rollout_proc10 -[2024-07-05 11:17:20,854][25826] Starting process rollout_proc11 -[2024-07-05 11:17:20,855][25826] Starting process rollout_proc12 -[2024-07-05 11:17:20,860][25826] Starting process rollout_proc13 -[2024-07-05 11:17:20,860][25826] Starting process rollout_proc14 -[2024-07-05 11:17:20,875][25826] Starting process rollout_proc15 -[2024-07-05 11:17:24,781][34139] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:17:24,782][34139] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 11:17:24,860][34148] Worker 8 uses CPU cores [8] -[2024-07-05 11:17:24,880][34141] Worker 1 uses CPU cores [1] -[2024-07-05 11:17:24,883][34146] Worker 6 uses CPU cores [6] -[2024-07-05 11:17:24,888][34165] Worker 12 uses CPU cores [12] -[2024-07-05 11:17:24,907][34139] Num visible devices: 1 -[2024-07-05 11:17:24,983][34169] Worker 13 uses CPU cores [13] -[2024-07-05 11:17:25,052][34144] Worker 4 uses CPU cores [4] -[2024-07-05 11:17:25,088][34142] Worker 3 uses CPU cores [3] -[2024-07-05 11:17:25,101][34140] Worker 0 uses CPU cores [0] -[2024-07-05 11:17:25,147][34119] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:17:25,147][34119] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 11:17:25,200][34119] Num visible devices: 1 -[2024-07-05 11:17:25,229][34119] Setting fixed seed 200 -[2024-07-05 11:17:25,231][34145] Worker 5 uses CPU cores [5] -[2024-07-05 11:17:25,232][34119] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:17:25,232][34119] Initializing actor-critic model on device cuda:0 -[2024-07-05 11:17:25,233][34119] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:17:25,233][34119] RunningMeanStd input shape: (1,) -[2024-07-05 11:17:25,244][34119] ConvEncoder: input_channels=3 -[2024-07-05 11:17:25,276][34170] Worker 15 uses CPU cores [15] -[2024-07-05 11:17:25,319][34119] Conv encoder output size: 512 -[2024-07-05 11:17:25,319][34119] Policy head output size: 512 -[2024-07-05 11:17:25,328][34119] Created Actor Critic model with architecture: -[2024-07-05 11:17:25,328][34119] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) - ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) - ) - ) - ) - ) - (core): ModelCoreRNN( - (core): GRU(512, 512) - ) - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=5, bias=True) - ) -) -[2024-07-05 11:17:25,355][34168] Worker 14 uses CPU cores [14] -[2024-07-05 11:17:25,379][34143] Worker 2 uses CPU cores [2] -[2024-07-05 11:17:25,385][34149] Worker 9 uses CPU cores [9] -[2024-07-05 11:17:25,396][34147] Worker 7 uses CPU cores [7] -[2024-07-05 11:17:25,415][34167] Worker 11 uses CPU cores [11] -[2024-07-05 11:17:25,419][34166] Worker 10 uses CPU cores [10] -[2024-07-05 11:17:25,420][34119] Using optimizer -[2024-07-05 11:17:25,991][34119] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000006106_30015488.pth... -[2024-07-05 11:17:26,008][34119] Loading model from checkpoint -[2024-07-05 11:17:26,009][34119] Loaded experiment state at self.train_step=6106, self.env_steps=30015488 -[2024-07-05 11:17:26,009][34119] Initialized policy 0 weights for model version 6106 -[2024-07-05 11:17:26,010][34119] LearnerWorker_p0 finished initialization! -[2024-07-05 11:17:26,010][34119] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:17:26,076][34139] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:17:26,077][34139] RunningMeanStd input shape: (1,) -[2024-07-05 11:17:26,084][34139] ConvEncoder: input_channels=3 -[2024-07-05 11:17:26,138][34139] Conv encoder output size: 512 -[2024-07-05 11:17:26,138][34139] Policy head output size: 512 -[2024-07-05 11:17:26,172][25826] Inference worker 0-0 is ready! -[2024-07-05 11:17:26,173][25826] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 11:17:26,221][34141] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,223][34142] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,224][34149] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,227][34147] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,227][34143] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,228][34146] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,228][34169] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,229][34148] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,235][34144] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,236][34140] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,235][34170] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,237][34165] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,237][34168] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,243][34167] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,256][34166] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,259][34145] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:17:26,811][34140] Decorrelating experience for 0 frames... -[2024-07-05 11:17:26,811][34168] Decorrelating experience for 0 frames... -[2024-07-05 11:17:26,835][34142] Decorrelating experience for 0 frames... -[2024-07-05 11:17:26,838][34146] Decorrelating experience for 0 frames... -[2024-07-05 11:17:26,838][34144] Decorrelating experience for 0 frames... -[2024-07-05 11:17:26,840][34141] Decorrelating experience for 0 frames... -[2024-07-05 11:17:26,840][34149] Decorrelating experience for 0 frames... -[2024-07-05 11:17:26,840][34148] Decorrelating experience for 0 frames... -[2024-07-05 11:17:26,841][34170] Decorrelating experience for 0 frames... -[2024-07-05 11:17:26,970][34140] Decorrelating experience for 32 frames... -[2024-07-05 11:17:26,970][34168] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,000][34149] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,005][34166] Decorrelating experience for 0 frames... -[2024-07-05 11:17:27,044][34145] Decorrelating experience for 0 frames... -[2024-07-05 11:17:27,048][34146] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,076][34147] Decorrelating experience for 0 frames... -[2024-07-05 11:17:27,092][34169] Decorrelating experience for 0 frames... -[2024-07-05 11:17:27,093][34165] Decorrelating experience for 0 frames... -[2024-07-05 11:17:27,151][34142] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,153][34148] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,165][34166] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,203][34145] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,275][34146] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,295][34147] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,323][34140] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,324][34149] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,348][34143] Decorrelating experience for 0 frames... -[2024-07-05 11:17:27,358][34144] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,380][34142] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,393][34141] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,404][34169] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,475][34168] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,487][34166] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,564][34141] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,565][34170] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,570][34143] Decorrelating experience for 32 frames... -[2024-07-05 11:17:27,571][34146] Decorrelating experience for 96 frames... -[2024-07-05 11:17:27,577][34147] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,581][34145] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,624][34142] Decorrelating experience for 96 frames... -[2024-07-05 11:17:27,657][34149] Decorrelating experience for 96 frames... -[2024-07-05 11:17:27,724][34168] Decorrelating experience for 96 frames... -[2024-07-05 11:17:27,778][34169] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,789][34143] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,790][34147] Decorrelating experience for 96 frames... -[2024-07-05 11:17:27,795][34170] Decorrelating experience for 64 frames... -[2024-07-05 11:17:27,821][34145] Decorrelating experience for 96 frames... -[2024-07-05 11:17:27,842][34167] Decorrelating experience for 0 frames... -[2024-07-05 11:17:27,915][34149] Decorrelating experience for 128 frames... -[2024-07-05 11:17:27,972][34140] Decorrelating experience for 96 frames... -[2024-07-05 11:17:28,004][34170] Decorrelating experience for 96 frames... -[2024-07-05 11:17:28,005][34169] Decorrelating experience for 96 frames... -[2024-07-05 11:17:28,008][34167] Decorrelating experience for 32 frames... -[2024-07-05 11:17:28,012][34146] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,083][34165] Decorrelating experience for 32 frames... -[2024-07-05 11:17:28,088][34145] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,131][34147] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,165][34168] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,188][34143] Decorrelating experience for 96 frames... -[2024-07-05 11:17:28,203][34166] Decorrelating experience for 96 frames... -[2024-07-05 11:17:28,239][34149] Decorrelating experience for 160 frames... -[2024-07-05 11:17:28,248][34148] Decorrelating experience for 64 frames... -[2024-07-05 11:17:28,276][25826] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 30015488. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 11:17:28,284][34140] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,304][34145] Decorrelating experience for 160 frames... -[2024-07-05 11:17:28,325][34141] Decorrelating experience for 96 frames... -[2024-07-05 11:17:28,341][34167] Decorrelating experience for 64 frames... -[2024-07-05 11:17:28,407][34170] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,423][34165] Decorrelating experience for 64 frames... -[2024-07-05 11:17:28,463][34142] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,469][34149] Decorrelating experience for 192 frames... -[2024-07-05 11:17:28,536][34145] Decorrelating experience for 192 frames... -[2024-07-05 11:17:28,537][34147] Decorrelating experience for 160 frames... -[2024-07-05 11:17:28,588][34168] Decorrelating experience for 160 frames... -[2024-07-05 11:17:28,628][34141] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,658][34165] Decorrelating experience for 96 frames... -[2024-07-05 11:17:28,677][34144] Decorrelating experience for 64 frames... -[2024-07-05 11:17:28,696][34169] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,746][34170] Decorrelating experience for 160 frames... -[2024-07-05 11:17:28,777][34167] Decorrelating experience for 96 frames... -[2024-07-05 11:17:28,813][34166] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,881][34141] Decorrelating experience for 160 frames... -[2024-07-05 11:17:28,885][34148] Decorrelating experience for 96 frames... -[2024-07-05 11:17:28,903][34144] Decorrelating experience for 96 frames... -[2024-07-05 11:17:28,916][34140] Decorrelating experience for 160 frames... -[2024-07-05 11:17:28,928][34165] Decorrelating experience for 128 frames... -[2024-07-05 11:17:28,998][34149] Decorrelating experience for 224 frames... -[2024-07-05 11:17:29,021][34146] Decorrelating experience for 160 frames... -[2024-07-05 11:17:29,110][34142] Decorrelating experience for 160 frames... -[2024-07-05 11:17:29,112][34145] Decorrelating experience for 224 frames... -[2024-07-05 11:17:29,200][34140] Decorrelating experience for 192 frames... -[2024-07-05 11:17:29,225][34147] Decorrelating experience for 192 frames... -[2024-07-05 11:17:29,304][34148] Decorrelating experience for 128 frames... -[2024-07-05 11:17:29,307][34146] Decorrelating experience for 192 frames... -[2024-07-05 11:17:29,392][34166] Decorrelating experience for 160 frames... -[2024-07-05 11:17:29,398][34142] Decorrelating experience for 192 frames... -[2024-07-05 11:17:29,442][34143] Decorrelating experience for 128 frames... -[2024-07-05 11:17:29,472][34141] Decorrelating experience for 192 frames... -[2024-07-05 11:17:29,541][34148] Decorrelating experience for 160 frames... -[2024-07-05 11:17:29,568][34147] Decorrelating experience for 224 frames... -[2024-07-05 11:17:29,601][34169] Decorrelating experience for 160 frames... -[2024-07-05 11:17:29,664][34140] Decorrelating experience for 224 frames... -[2024-07-05 11:17:29,693][34165] Decorrelating experience for 160 frames... -[2024-07-05 11:17:29,695][34142] Decorrelating experience for 224 frames... -[2024-07-05 11:17:29,716][34143] Decorrelating experience for 160 frames... -[2024-07-05 11:17:29,776][34146] Decorrelating experience for 224 frames... -[2024-07-05 11:17:29,816][34141] Decorrelating experience for 224 frames... -[2024-07-05 11:17:29,882][34166] Decorrelating experience for 192 frames... -[2024-07-05 11:17:29,888][34148] Decorrelating experience for 192 frames... -[2024-07-05 11:17:29,894][34169] Decorrelating experience for 192 frames... -[2024-07-05 11:17:29,993][34165] Decorrelating experience for 192 frames... -[2024-07-05 11:17:30,059][34168] Decorrelating experience for 192 frames... -[2024-07-05 11:17:30,142][34170] Decorrelating experience for 192 frames... -[2024-07-05 11:17:30,171][34166] Decorrelating experience for 224 frames... -[2024-07-05 11:17:30,186][34148] Decorrelating experience for 224 frames... -[2024-07-05 11:17:30,215][34143] Decorrelating experience for 192 frames... -[2024-07-05 11:17:30,290][34165] Decorrelating experience for 224 frames... -[2024-07-05 11:17:30,312][34167] Decorrelating experience for 128 frames... -[2024-07-05 11:17:30,401][34168] Decorrelating experience for 224 frames... -[2024-07-05 11:17:30,471][34170] Decorrelating experience for 224 frames... -[2024-07-05 11:17:30,490][34169] Decorrelating experience for 224 frames... -[2024-07-05 11:17:30,538][34143] Decorrelating experience for 224 frames... -[2024-07-05 11:17:30,551][34144] Decorrelating experience for 128 frames... -[2024-07-05 11:17:30,661][34167] Decorrelating experience for 160 frames... -[2024-07-05 11:17:30,789][34144] Decorrelating experience for 160 frames... -[2024-07-05 11:17:30,844][34119] Signal inference workers to stop experience collection... -[2024-07-05 11:17:30,853][34139] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 11:17:31,004][34167] Decorrelating experience for 192 frames... -[2024-07-05 11:17:31,008][34144] Decorrelating experience for 192 frames... -[2024-07-05 11:17:31,216][34167] Decorrelating experience for 224 frames... -[2024-07-05 11:17:31,216][34144] Decorrelating experience for 224 frames... -[2024-07-05 11:17:31,973][34119] Signal inference workers to resume experience collection... -[2024-07-05 11:17:31,974][34139] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 11:17:33,276][25826] Fps is (10 sec: 11468.8, 60 sec: 11468.8, 300 sec: 11468.8). Total num frames: 30072832. Throughput: 0: 1368.0. Samples: 6840. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 11:17:33,277][25826] Avg episode reward: [(0, '4.339')] -[2024-07-05 11:17:33,726][34139] Updated weights for policy 0, policy_version 6116 (0.0102) -[2024-07-05 11:17:35,537][34139] Updated weights for policy 0, policy_version 6126 (0.0011) -[2024-07-05 11:17:37,280][34139] Updated weights for policy 0, policy_version 6136 (0.0008) -[2024-07-05 11:17:38,276][25826] Fps is (10 sec: 28672.0, 60 sec: 28672.0, 300 sec: 28672.0). Total num frames: 30302208. Throughput: 0: 7476.0. Samples: 74760. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:17:38,277][25826] Avg episode reward: [(0, '40.641')] -[2024-07-05 11:17:38,328][34119] Saving new best policy, reward=40.641! -[2024-07-05 11:17:39,047][34139] Updated weights for policy 0, policy_version 6146 (0.0008) -[2024-07-05 11:17:40,733][25826] Heartbeat connected on Batcher_0 -[2024-07-05 11:17:40,745][25826] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 11:17:40,747][25826] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 11:17:40,750][25826] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 11:17:40,753][25826] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 11:17:40,756][25826] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 11:17:40,759][25826] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 11:17:40,761][25826] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 11:17:40,764][25826] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 11:17:40,765][34139] Updated weights for policy 0, policy_version 6156 (0.0008) -[2024-07-05 11:17:40,768][25826] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 11:17:40,770][25826] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 11:17:40,779][25826] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 11:17:40,781][25826] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 11:17:40,783][25826] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 11:17:40,784][25826] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 11:17:40,786][25826] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 11:17:40,787][25826] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 11:17:40,788][25826] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 11:17:40,791][25826] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 11:17:42,506][34139] Updated weights for policy 0, policy_version 6166 (0.0009) -[2024-07-05 11:17:43,276][25826] Fps is (10 sec: 46694.4, 60 sec: 34952.5, 300 sec: 34952.5). Total num frames: 30539776. Throughput: 0: 7317.1. Samples: 109756. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:17:43,277][25826] Avg episode reward: [(0, '36.904')] -[2024-07-05 11:17:44,246][34139] Updated weights for policy 0, policy_version 6176 (0.0007) -[2024-07-05 11:17:45,920][34139] Updated weights for policy 0, policy_version 6186 (0.0008) -[2024-07-05 11:17:47,617][34139] Updated weights for policy 0, policy_version 6196 (0.0010) -[2024-07-05 11:17:48,276][25826] Fps is (10 sec: 47513.7, 60 sec: 38092.8, 300 sec: 38092.8). Total num frames: 30777344. Throughput: 0: 9098.4. Samples: 181968. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:17:48,276][25826] Avg episode reward: [(0, '34.642')] -[2024-07-05 11:17:49,324][34139] Updated weights for policy 0, policy_version 6206 (0.0009) -[2024-07-05 11:17:50,982][34139] Updated weights for policy 0, policy_version 6216 (0.0007) -[2024-07-05 11:17:52,678][34139] Updated weights for policy 0, policy_version 6226 (0.0007) -[2024-07-05 11:17:53,276][25826] Fps is (10 sec: 48332.5, 60 sec: 40304.5, 300 sec: 40304.5). Total num frames: 31023104. Throughput: 0: 10171.5. Samples: 254288. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:17:53,277][25826] Avg episode reward: [(0, '36.869')] -[2024-07-05 11:17:54,404][34139] Updated weights for policy 0, policy_version 6236 (0.0007) -[2024-07-05 11:17:56,071][34139] Updated weights for policy 0, policy_version 6246 (0.0008) -[2024-07-05 11:17:57,713][34139] Updated weights for policy 0, policy_version 6256 (0.0010) -[2024-07-05 11:17:58,276][25826] Fps is (10 sec: 49151.8, 60 sec: 41779.2, 300 sec: 41779.2). Total num frames: 31268864. Throughput: 0: 9695.3. Samples: 290860. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:17:58,277][25826] Avg episode reward: [(0, '34.750')] -[2024-07-05 11:17:59,448][34139] Updated weights for policy 0, policy_version 6266 (0.0010) -[2024-07-05 11:18:01,106][34139] Updated weights for policy 0, policy_version 6276 (0.0007) -[2024-07-05 11:18:02,807][34139] Updated weights for policy 0, policy_version 6286 (0.0013) -[2024-07-05 11:18:03,276][25826] Fps is (10 sec: 48332.9, 60 sec: 42598.3, 300 sec: 42598.3). Total num frames: 31506432. Throughput: 0: 10379.3. Samples: 363276. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:18:03,277][25826] Avg episode reward: [(0, '33.560')] -[2024-07-05 11:18:04,518][34139] Updated weights for policy 0, policy_version 6296 (0.0007) -[2024-07-05 11:18:06,221][34139] Updated weights for policy 0, policy_version 6306 (0.0008) -[2024-07-05 11:18:07,930][34139] Updated weights for policy 0, policy_version 6316 (0.0007) -[2024-07-05 11:18:08,275][25826] Fps is (10 sec: 48333.0, 60 sec: 43417.6, 300 sec: 43417.6). Total num frames: 31752192. Throughput: 0: 10887.3. Samples: 435492. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:18:08,276][25826] Avg episode reward: [(0, '38.284')] -[2024-07-05 11:18:09,643][34139] Updated weights for policy 0, policy_version 6326 (0.0007) -[2024-07-05 11:18:11,326][34139] Updated weights for policy 0, policy_version 6336 (0.0013) -[2024-07-05 11:18:13,003][34139] Updated weights for policy 0, policy_version 6346 (0.0010) -[2024-07-05 11:18:13,275][25826] Fps is (10 sec: 48333.3, 60 sec: 43872.8, 300 sec: 43872.8). Total num frames: 31989760. Throughput: 0: 10480.8. Samples: 471636. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:18:13,276][25826] Avg episode reward: [(0, '39.657')] -[2024-07-05 11:18:14,714][34139] Updated weights for policy 0, policy_version 6356 (0.0008) -[2024-07-05 11:18:16,437][34139] Updated weights for policy 0, policy_version 6366 (0.0010) -[2024-07-05 11:18:18,146][34139] Updated weights for policy 0, policy_version 6376 (0.0008) -[2024-07-05 11:18:18,276][25826] Fps is (10 sec: 48332.5, 60 sec: 44400.6, 300 sec: 44400.6). Total num frames: 32235520. Throughput: 0: 11942.3. Samples: 544244. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:18:18,277][25826] Avg episode reward: [(0, '36.109')] -[2024-07-05 11:18:19,826][34139] Updated weights for policy 0, policy_version 6386 (0.0007) -[2024-07-05 11:18:21,517][34139] Updated weights for policy 0, policy_version 6396 (0.0011) -[2024-07-05 11:18:23,159][34139] Updated weights for policy 0, policy_version 6406 (0.0008) -[2024-07-05 11:18:23,275][25826] Fps is (10 sec: 48332.8, 60 sec: 44683.7, 300 sec: 44683.7). Total num frames: 32473088. Throughput: 0: 12050.3. Samples: 617024. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:18:23,276][25826] Avg episode reward: [(0, '38.454')] -[2024-07-05 11:18:24,843][34139] Updated weights for policy 0, policy_version 6416 (0.0015) -[2024-07-05 11:18:26,547][34139] Updated weights for policy 0, policy_version 6426 (0.0009) -[2024-07-05 11:18:28,245][34139] Updated weights for policy 0, policy_version 6436 (0.0010) -[2024-07-05 11:18:28,276][25826] Fps is (10 sec: 48332.8, 60 sec: 45056.0, 300 sec: 45056.0). Total num frames: 32718848. Throughput: 0: 12097.5. Samples: 654144. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:18:28,277][25826] Avg episode reward: [(0, '33.176')] -[2024-07-05 11:18:30,023][34139] Updated weights for policy 0, policy_version 6446 (0.0010) -[2024-07-05 11:18:31,756][34139] Updated weights for policy 0, policy_version 6456 (0.0010) -[2024-07-05 11:18:33,276][25826] Fps is (10 sec: 47513.1, 60 sec: 47923.2, 300 sec: 45119.0). Total num frames: 32948224. Throughput: 0: 12050.3. Samples: 724232. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:18:33,277][25826] Avg episode reward: [(0, '35.538')] -[2024-07-05 11:18:33,481][34139] Updated weights for policy 0, policy_version 6466 (0.0008) -[2024-07-05 11:18:35,180][34139] Updated weights for policy 0, policy_version 6476 (0.0010) -[2024-07-05 11:18:36,871][34139] Updated weights for policy 0, policy_version 6486 (0.0010) -[2024-07-05 11:18:38,276][25826] Fps is (10 sec: 46693.5, 60 sec: 48059.6, 300 sec: 45289.9). Total num frames: 33185792. Throughput: 0: 12026.1. Samples: 795464. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:18:38,277][25826] Avg episode reward: [(0, '35.629')] -[2024-07-05 11:18:38,675][34139] Updated weights for policy 0, policy_version 6496 (0.0013) -[2024-07-05 11:18:40,439][34139] Updated weights for policy 0, policy_version 6506 (0.0008) -[2024-07-05 11:18:42,132][34139] Updated weights for policy 0, policy_version 6516 (0.0008) -[2024-07-05 11:18:43,276][25826] Fps is (10 sec: 47513.7, 60 sec: 48059.7, 300 sec: 45438.3). Total num frames: 33423360. Throughput: 0: 12005.2. Samples: 831096. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:18:43,277][25826] Avg episode reward: [(0, '37.717')] -[2024-07-05 11:18:43,878][34139] Updated weights for policy 0, policy_version 6526 (0.0010) -[2024-07-05 11:18:45,587][34139] Updated weights for policy 0, policy_version 6536 (0.0008) -[2024-07-05 11:18:47,371][34139] Updated weights for policy 0, policy_version 6546 (0.0009) -[2024-07-05 11:18:48,276][25826] Fps is (10 sec: 47514.3, 60 sec: 48059.7, 300 sec: 45568.0). Total num frames: 33660928. Throughput: 0: 11971.3. Samples: 901984. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:18:48,277][25826] Avg episode reward: [(0, '35.564')] -[2024-07-05 11:18:49,062][34139] Updated weights for policy 0, policy_version 6556 (0.0008) -[2024-07-05 11:18:50,710][34139] Updated weights for policy 0, policy_version 6566 (0.0008) -[2024-07-05 11:18:52,434][34139] Updated weights for policy 0, policy_version 6576 (0.0010) -[2024-07-05 11:18:53,276][25826] Fps is (10 sec: 47513.1, 60 sec: 47923.2, 300 sec: 45682.4). Total num frames: 33898496. Throughput: 0: 11953.7. Samples: 973412. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:18:53,277][25826] Avg episode reward: [(0, '35.897')] -[2024-07-05 11:18:54,163][34139] Updated weights for policy 0, policy_version 6586 (0.0008) -[2024-07-05 11:18:55,977][34139] Updated weights for policy 0, policy_version 6596 (0.0008) -[2024-07-05 11:18:57,763][34139] Updated weights for policy 0, policy_version 6606 (0.0010) -[2024-07-05 11:18:58,275][25826] Fps is (10 sec: 46695.0, 60 sec: 47650.2, 300 sec: 45693.2). Total num frames: 34127872. Throughput: 0: 11929.2. Samples: 1008452. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:18:58,276][25826] Avg episode reward: [(0, '37.131')] -[2024-07-05 11:18:59,430][34139] Updated weights for policy 0, policy_version 6616 (0.0007) -[2024-07-05 11:19:01,119][34139] Updated weights for policy 0, policy_version 6626 (0.0008) -[2024-07-05 11:19:02,909][34139] Updated weights for policy 0, policy_version 6636 (0.0013) -[2024-07-05 11:19:03,276][25826] Fps is (10 sec: 47513.6, 60 sec: 47786.6, 300 sec: 45875.1). Total num frames: 34373632. Throughput: 0: 11907.8. Samples: 1080096. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:19:03,277][25826] Avg episode reward: [(0, '36.085')] -[2024-07-05 11:19:04,642][34139] Updated weights for policy 0, policy_version 6646 (0.0012) -[2024-07-05 11:19:06,342][34139] Updated weights for policy 0, policy_version 6656 (0.0007) -[2024-07-05 11:19:07,953][34139] Updated weights for policy 0, policy_version 6666 (0.0007) -[2024-07-05 11:19:08,276][25826] Fps is (10 sec: 49151.3, 60 sec: 47786.6, 300 sec: 46039.0). Total num frames: 34619392. Throughput: 0: 11894.2. Samples: 1152264. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:19:08,277][25826] Avg episode reward: [(0, '38.023')] -[2024-07-05 11:19:09,558][34139] Updated weights for policy 0, policy_version 6676 (0.0008) -[2024-07-05 11:19:11,247][34139] Updated weights for policy 0, policy_version 6686 (0.0008) -[2024-07-05 11:19:12,960][34139] Updated weights for policy 0, policy_version 6696 (0.0007) -[2024-07-05 11:19:13,276][25826] Fps is (10 sec: 48333.3, 60 sec: 47786.6, 300 sec: 46109.3). Total num frames: 34856960. Throughput: 0: 11897.6. Samples: 1189536. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:19:13,277][25826] Avg episode reward: [(0, '43.087')] -[2024-07-05 11:19:13,309][34119] Saving new best policy, reward=43.087! -[2024-07-05 11:19:14,763][34139] Updated weights for policy 0, policy_version 6706 (0.0010) -[2024-07-05 11:19:16,397][34139] Updated weights for policy 0, policy_version 6716 (0.0007) -[2024-07-05 11:19:18,026][34139] Updated weights for policy 0, policy_version 6726 (0.0008) -[2024-07-05 11:19:18,276][25826] Fps is (10 sec: 48333.2, 60 sec: 47786.7, 300 sec: 46247.6). Total num frames: 35102720. Throughput: 0: 11945.2. Samples: 1261764. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:19:18,276][25826] Avg episode reward: [(0, '38.923')] -[2024-07-05 11:19:18,280][34119] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000006727_35102720.pth... -[2024-07-05 11:19:18,345][34119] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000005507_25108480.pth -[2024-07-05 11:19:19,643][34139] Updated weights for policy 0, policy_version 6736 (0.0007) -[2024-07-05 11:19:21,291][34139] Updated weights for policy 0, policy_version 6746 (0.0008) -[2024-07-05 11:19:22,910][34139] Updated weights for policy 0, policy_version 6756 (0.0008) -[2024-07-05 11:19:23,275][25826] Fps is (10 sec: 49971.5, 60 sec: 48059.7, 300 sec: 46445.1). Total num frames: 35356672. Throughput: 0: 12025.9. Samples: 1336624. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:19:23,276][25826] Avg episode reward: [(0, '39.626')] -[2024-07-05 11:19:24,599][34139] Updated weights for policy 0, policy_version 6766 (0.0007) -[2024-07-05 11:19:26,240][34139] Updated weights for policy 0, policy_version 6776 (0.0008) -[2024-07-05 11:19:27,896][34139] Updated weights for policy 0, policy_version 6786 (0.0007) -[2024-07-05 11:19:28,276][25826] Fps is (10 sec: 49971.0, 60 sec: 48059.7, 300 sec: 46557.9). Total num frames: 35602432. Throughput: 0: 12068.2. Samples: 1374164. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:19:28,277][25826] Avg episode reward: [(0, '40.617')] -[2024-07-05 11:19:29,508][34139] Updated weights for policy 0, policy_version 6796 (0.0008) -[2024-07-05 11:19:31,144][34139] Updated weights for policy 0, policy_version 6806 (0.0010) -[2024-07-05 11:19:32,792][34139] Updated weights for policy 0, policy_version 6816 (0.0008) -[2024-07-05 11:19:33,276][25826] Fps is (10 sec: 49970.8, 60 sec: 48469.3, 300 sec: 46727.2). Total num frames: 35856384. Throughput: 0: 12159.8. Samples: 1449176. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:19:33,277][25826] Avg episode reward: [(0, '39.652')] -[2024-07-05 11:19:34,412][34139] Updated weights for policy 0, policy_version 6826 (0.0008) -[2024-07-05 11:19:36,074][34139] Updated weights for policy 0, policy_version 6836 (0.0007) -[2024-07-05 11:19:37,731][34139] Updated weights for policy 0, policy_version 6846 (0.0008) -[2024-07-05 11:19:38,276][25826] Fps is (10 sec: 49971.3, 60 sec: 48606.0, 300 sec: 46820.4). Total num frames: 36102144. Throughput: 0: 12222.3. Samples: 1523412. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:19:38,276][25826] Avg episode reward: [(0, '38.590')] -[2024-07-05 11:19:39,378][34139] Updated weights for policy 0, policy_version 6856 (0.0007) -[2024-07-05 11:19:41,028][34139] Updated weights for policy 0, policy_version 6866 (0.0008) -[2024-07-05 11:19:42,676][34139] Updated weights for policy 0, policy_version 6876 (0.0007) -[2024-07-05 11:19:43,276][25826] Fps is (10 sec: 49152.0, 60 sec: 48742.4, 300 sec: 46906.8). Total num frames: 36347904. Throughput: 0: 12276.2. Samples: 1560884. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:19:43,276][25826] Avg episode reward: [(0, '37.733')] -[2024-07-05 11:19:44,319][34139] Updated weights for policy 0, policy_version 6886 (0.0008) -[2024-07-05 11:19:45,994][34139] Updated weights for policy 0, policy_version 6896 (0.0007) -[2024-07-05 11:19:47,691][34139] Updated weights for policy 0, policy_version 6906 (0.0007) -[2024-07-05 11:19:48,276][25826] Fps is (10 sec: 49151.9, 60 sec: 48879.0, 300 sec: 46987.0). Total num frames: 36593664. Throughput: 0: 12322.8. Samples: 1634620. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:19:48,276][25826] Avg episode reward: [(0, '37.433')] -[2024-07-05 11:19:49,334][34139] Updated weights for policy 0, policy_version 6916 (0.0008) -[2024-07-05 11:19:50,976][34139] Updated weights for policy 0, policy_version 6926 (0.0008) -[2024-07-05 11:19:52,581][34139] Updated weights for policy 0, policy_version 6936 (0.0007) -[2024-07-05 11:19:53,275][25826] Fps is (10 sec: 49971.4, 60 sec: 49152.1, 300 sec: 47118.1). Total num frames: 36847616. Throughput: 0: 12381.8. Samples: 1709444. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:19:53,277][25826] Avg episode reward: [(0, '42.008')] -[2024-07-05 11:19:54,283][34139] Updated weights for policy 0, policy_version 6946 (0.0011) -[2024-07-05 11:19:55,953][34139] Updated weights for policy 0, policy_version 6956 (0.0010) -[2024-07-05 11:19:57,739][34139] Updated weights for policy 0, policy_version 6966 (0.0010) -[2024-07-05 11:19:58,275][25826] Fps is (10 sec: 48333.4, 60 sec: 49152.0, 300 sec: 47076.7). Total num frames: 37076992. Throughput: 0: 12353.4. Samples: 1745436. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:19:58,277][25826] Avg episode reward: [(0, '39.374')] -[2024-07-05 11:19:59,456][34139] Updated weights for policy 0, policy_version 6976 (0.0008) -[2024-07-05 11:20:01,161][34139] Updated weights for policy 0, policy_version 6986 (0.0010) -[2024-07-05 11:20:02,866][34139] Updated weights for policy 0, policy_version 6996 (0.0007) -[2024-07-05 11:20:03,276][25826] Fps is (10 sec: 47510.9, 60 sec: 49151.6, 300 sec: 47143.5). Total num frames: 37322752. Throughput: 0: 12340.3. Samples: 1817084. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:20:03,278][25826] Avg episode reward: [(0, '39.495')] -[2024-07-05 11:20:04,533][34139] Updated weights for policy 0, policy_version 7006 (0.0007) -[2024-07-05 11:20:06,211][34139] Updated weights for policy 0, policy_version 7016 (0.0008) -[2024-07-05 11:20:07,882][34139] Updated weights for policy 0, policy_version 7026 (0.0007) -[2024-07-05 11:20:08,276][25826] Fps is (10 sec: 49151.4, 60 sec: 49152.0, 300 sec: 47206.4). Total num frames: 37568512. Throughput: 0: 12302.7. Samples: 1890248. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:20:08,276][25826] Avg episode reward: [(0, '42.406')] -[2024-07-05 11:20:09,543][34139] Updated weights for policy 0, policy_version 7036 (0.0009) -[2024-07-05 11:20:11,210][34139] Updated weights for policy 0, policy_version 7046 (0.0010) -[2024-07-05 11:20:12,954][34139] Updated weights for policy 0, policy_version 7056 (0.0007) -[2024-07-05 11:20:13,276][25826] Fps is (10 sec: 48335.5, 60 sec: 49152.0, 300 sec: 47215.7). Total num frames: 37806080. Throughput: 0: 12286.9. Samples: 1927076. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:20:13,276][25826] Avg episode reward: [(0, '37.298')] -[2024-07-05 11:20:14,604][34139] Updated weights for policy 0, policy_version 7066 (0.0009) -[2024-07-05 11:20:16,250][34139] Updated weights for policy 0, policy_version 7076 (0.0007) -[2024-07-05 11:20:17,934][34139] Updated weights for policy 0, policy_version 7086 (0.0010) -[2024-07-05 11:20:18,276][25826] Fps is (10 sec: 49152.0, 60 sec: 49288.5, 300 sec: 47320.8). Total num frames: 38060032. Throughput: 0: 12249.5. Samples: 2000404. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:20:18,277][25826] Avg episode reward: [(0, '36.096')] -[2024-07-05 11:20:19,596][34139] Updated weights for policy 0, policy_version 7096 (0.0010) -[2024-07-05 11:20:21,310][34139] Updated weights for policy 0, policy_version 7106 (0.0016) -[2024-07-05 11:20:23,051][34139] Updated weights for policy 0, policy_version 7116 (0.0007) -[2024-07-05 11:20:23,277][25826] Fps is (10 sec: 49146.5, 60 sec: 49014.5, 300 sec: 47326.1). Total num frames: 38297600. Throughput: 0: 12210.1. Samples: 2072880. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:20:23,278][25826] Avg episode reward: [(0, '38.490')] -[2024-07-05 11:20:24,722][34139] Updated weights for policy 0, policy_version 7126 (0.0008) -[2024-07-05 11:20:26,510][34139] Updated weights for policy 0, policy_version 7136 (0.0010) -[2024-07-05 11:20:28,256][34139] Updated weights for policy 0, policy_version 7146 (0.0007) -[2024-07-05 11:20:28,276][25826] Fps is (10 sec: 47510.5, 60 sec: 48878.4, 300 sec: 47331.4). Total num frames: 38535168. Throughput: 0: 12166.5. Samples: 2108384. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:20:28,278][25826] Avg episode reward: [(0, '40.671')] -[2024-07-05 11:20:29,928][34139] Updated weights for policy 0, policy_version 7156 (0.0007) -[2024-07-05 11:20:31,666][34139] Updated weights for policy 0, policy_version 7166 (0.0008) -[2024-07-05 11:20:33,275][25826] Fps is (10 sec: 47519.1, 60 sec: 48605.9, 300 sec: 47336.5). Total num frames: 38772736. Throughput: 0: 12108.8. Samples: 2179516. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:20:33,276][25826] Avg episode reward: [(0, '41.610')] -[2024-07-05 11:20:33,365][34139] Updated weights for policy 0, policy_version 7176 (0.0007) -[2024-07-05 11:20:35,047][34139] Updated weights for policy 0, policy_version 7186 (0.0009) -[2024-07-05 11:20:36,733][34139] Updated weights for policy 0, policy_version 7196 (0.0007) -[2024-07-05 11:20:38,276][25826] Fps is (10 sec: 48332.7, 60 sec: 48605.3, 300 sec: 47384.1). Total num frames: 39018496. Throughput: 0: 12067.6. Samples: 2252496. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:20:38,278][25826] Avg episode reward: [(0, '42.037')] -[2024-07-05 11:20:38,438][34139] Updated weights for policy 0, policy_version 7206 (0.0010) -[2024-07-05 11:20:40,108][34139] Updated weights for policy 0, policy_version 7216 (0.0008) -[2024-07-05 11:20:41,772][34139] Updated weights for policy 0, policy_version 7226 (0.0008) -[2024-07-05 11:20:43,275][25826] Fps is (10 sec: 49152.0, 60 sec: 48605.9, 300 sec: 47429.6). Total num frames: 39264256. Throughput: 0: 12079.1. Samples: 2288996. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:20:43,277][25826] Avg episode reward: [(0, '40.136')] -[2024-07-05 11:20:43,409][34139] Updated weights for policy 0, policy_version 7236 (0.0008) -[2024-07-05 11:20:45,089][34139] Updated weights for policy 0, policy_version 7246 (0.0008) -[2024-07-05 11:20:46,694][34139] Updated weights for policy 0, policy_version 7256 (0.0010) -[2024-07-05 11:20:48,276][25826] Fps is (10 sec: 49153.2, 60 sec: 48605.5, 300 sec: 47472.5). Total num frames: 39510016. Throughput: 0: 12144.6. Samples: 2363592. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:20:48,278][25826] Avg episode reward: [(0, '39.793')] -[2024-07-05 11:20:48,337][34139] Updated weights for policy 0, policy_version 7266 (0.0007) -[2024-07-05 11:20:50,077][34139] Updated weights for policy 0, policy_version 7276 (0.0007) -[2024-07-05 11:20:51,766][34139] Updated weights for policy 0, policy_version 7286 (0.0007) -[2024-07-05 11:20:53,276][25826] Fps is (10 sec: 49151.9, 60 sec: 48469.3, 300 sec: 47513.6). Total num frames: 39755776. Throughput: 0: 12150.3. Samples: 2437012. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:20:53,277][25826] Avg episode reward: [(0, '37.615')] -[2024-07-05 11:20:53,437][34139] Updated weights for policy 0, policy_version 7296 (0.0010) -[2024-07-05 11:20:55,093][34139] Updated weights for policy 0, policy_version 7306 (0.0007) -[2024-07-05 11:20:56,734][34139] Updated weights for policy 0, policy_version 7316 (0.0007) -[2024-07-05 11:20:58,276][25826] Fps is (10 sec: 49154.1, 60 sec: 48742.3, 300 sec: 47552.6). Total num frames: 40001536. Throughput: 0: 12152.3. Samples: 2473928. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:20:58,277][25826] Avg episode reward: [(0, '42.275')] -[2024-07-05 11:20:58,461][34139] Updated weights for policy 0, policy_version 7326 (0.0008) -[2024-07-05 11:21:00,085][34139] Updated weights for policy 0, policy_version 7336 (0.0008) -[2024-07-05 11:21:01,733][34139] Updated weights for policy 0, policy_version 7346 (0.0007) -[2024-07-05 11:21:03,276][25826] Fps is (10 sec: 48330.9, 60 sec: 48606.0, 300 sec: 47551.6). Total num frames: 40239104. Throughput: 0: 12142.1. Samples: 2546804. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:21:03,278][25826] Avg episode reward: [(0, '39.843')] -[2024-07-05 11:21:03,502][34139] Updated weights for policy 0, policy_version 7356 (0.0012) -[2024-07-05 11:21:05,239][34139] Updated weights for policy 0, policy_version 7366 (0.0008) -[2024-07-05 11:21:06,992][34139] Updated weights for policy 0, policy_version 7376 (0.0009) -[2024-07-05 11:21:08,275][25826] Fps is (10 sec: 47513.8, 60 sec: 48469.4, 300 sec: 47550.8). Total num frames: 40476672. Throughput: 0: 12125.0. Samples: 2618492. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:21:08,277][25826] Avg episode reward: [(0, '42.822')] -[2024-07-05 11:21:08,622][34139] Updated weights for policy 0, policy_version 7386 (0.0008) -[2024-07-05 11:21:10,260][34139] Updated weights for policy 0, policy_version 7396 (0.0009) -[2024-07-05 11:21:11,882][34139] Updated weights for policy 0, policy_version 7406 (0.0008) -[2024-07-05 11:21:13,275][25826] Fps is (10 sec: 49154.0, 60 sec: 48742.4, 300 sec: 47622.8). Total num frames: 40730624. Throughput: 0: 12178.5. Samples: 2656408. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:21:13,277][25826] Avg episode reward: [(0, '42.259')] -[2024-07-05 11:21:13,533][34139] Updated weights for policy 0, policy_version 7416 (0.0008) -[2024-07-05 11:21:15,167][34139] Updated weights for policy 0, policy_version 7426 (0.0008) -[2024-07-05 11:21:16,766][34139] Updated weights for policy 0, policy_version 7436 (0.0008) -[2024-07-05 11:21:18,276][25826] Fps is (10 sec: 50790.1, 60 sec: 48742.4, 300 sec: 47691.7). Total num frames: 40984576. Throughput: 0: 12268.1. Samples: 2731580. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:21:18,277][25826] Avg episode reward: [(0, '40.849')] -[2024-07-05 11:21:18,281][34119] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000007445_40984576.pth... -[2024-07-05 11:21:18,357][34119] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000006106_30015488.pth -[2024-07-05 11:21:18,414][34139] Updated weights for policy 0, policy_version 7446 (0.0011) -[2024-07-05 11:21:20,036][34139] Updated weights for policy 0, policy_version 7456 (0.0008) -[2024-07-05 11:21:21,655][34139] Updated weights for policy 0, policy_version 7466 (0.0007) -[2024-07-05 11:21:23,276][25826] Fps is (10 sec: 49971.0, 60 sec: 48879.8, 300 sec: 47722.8). Total num frames: 41230336. Throughput: 0: 12313.2. Samples: 2806580. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:21:23,277][25826] Avg episode reward: [(0, '39.290')] -[2024-07-05 11:21:23,338][34139] Updated weights for policy 0, policy_version 7476 (0.0010) -[2024-07-05 11:21:25,013][34139] Updated weights for policy 0, policy_version 7486 (0.0008) -[2024-07-05 11:21:26,706][34139] Updated weights for policy 0, policy_version 7496 (0.0007) -[2024-07-05 11:21:28,275][25826] Fps is (10 sec: 49152.6, 60 sec: 49016.1, 300 sec: 47752.5). Total num frames: 41476096. Throughput: 0: 12325.1. Samples: 2843624. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:21:28,277][25826] Avg episode reward: [(0, '38.764')] -[2024-07-05 11:21:28,319][34139] Updated weights for policy 0, policy_version 7506 (0.0008) -[2024-07-05 11:21:29,967][34139] Updated weights for policy 0, policy_version 7516 (0.0007) -[2024-07-05 11:21:31,573][34139] Updated weights for policy 0, policy_version 7526 (0.0010) -[2024-07-05 11:21:33,171][34139] Updated weights for policy 0, policy_version 7536 (0.0010) -[2024-07-05 11:21:33,275][25826] Fps is (10 sec: 49971.5, 60 sec: 49288.5, 300 sec: 47814.5). Total num frames: 41730048. Throughput: 0: 12326.0. Samples: 2918256. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:21:33,277][25826] Avg episode reward: [(0, '41.126')] -[2024-07-05 11:21:34,944][34139] Updated weights for policy 0, policy_version 7546 (0.0011) -[2024-07-05 11:21:36,614][34139] Updated weights for policy 0, policy_version 7556 (0.0008) -[2024-07-05 11:21:38,276][25826] Fps is (10 sec: 49147.3, 60 sec: 49151.9, 300 sec: 47808.3). Total num frames: 41967616. Throughput: 0: 12319.2. Samples: 2991388. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:21:38,279][25826] Avg episode reward: [(0, '36.638')] -[2024-07-05 11:21:38,280][34139] Updated weights for policy 0, policy_version 7566 (0.0007) -[2024-07-05 11:21:39,961][34139] Updated weights for policy 0, policy_version 7576 (0.0010) -[2024-07-05 11:21:41,669][34139] Updated weights for policy 0, policy_version 7586 (0.0008) -[2024-07-05 11:21:43,276][25826] Fps is (10 sec: 48332.6, 60 sec: 49152.0, 300 sec: 47834.9). Total num frames: 42213376. Throughput: 0: 12304.3. Samples: 3027620. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:21:43,277][25826] Avg episode reward: [(0, '38.600')] -[2024-07-05 11:21:43,319][34139] Updated weights for policy 0, policy_version 7596 (0.0010) -[2024-07-05 11:21:45,020][34139] Updated weights for policy 0, policy_version 7606 (0.0008) -[2024-07-05 11:21:46,697][34139] Updated weights for policy 0, policy_version 7616 (0.0009) -[2024-07-05 11:21:48,276][25826] Fps is (10 sec: 49156.4, 60 sec: 49152.4, 300 sec: 47860.2). Total num frames: 42459136. Throughput: 0: 12323.9. Samples: 3101376. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:21:48,276][25826] Avg episode reward: [(0, '38.955')] -[2024-07-05 11:21:48,308][34139] Updated weights for policy 0, policy_version 7626 (0.0008) -[2024-07-05 11:21:50,009][34139] Updated weights for policy 0, policy_version 7636 (0.0007) -[2024-07-05 11:21:51,699][34139] Updated weights for policy 0, policy_version 7646 (0.0010) -[2024-07-05 11:21:53,276][25826] Fps is (10 sec: 49152.1, 60 sec: 49152.0, 300 sec: 47884.6). Total num frames: 42704896. Throughput: 0: 12368.8. Samples: 3175088. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:21:53,277][25826] Avg episode reward: [(0, '42.187')] -[2024-07-05 11:21:53,335][34139] Updated weights for policy 0, policy_version 7656 (0.0008) -[2024-07-05 11:21:54,995][34139] Updated weights for policy 0, policy_version 7666 (0.0008) -[2024-07-05 11:21:56,677][34139] Updated weights for policy 0, policy_version 7676 (0.0007) -[2024-07-05 11:21:58,275][25826] Fps is (10 sec: 49152.3, 60 sec: 49152.1, 300 sec: 47908.0). Total num frames: 42950656. Throughput: 0: 12338.4. Samples: 3211636. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:21:58,277][25826] Avg episode reward: [(0, '40.367')] -[2024-07-05 11:21:58,356][34139] Updated weights for policy 0, policy_version 7686 (0.0008) -[2024-07-05 11:22:00,031][34139] Updated weights for policy 0, policy_version 7696 (0.0008) -[2024-07-05 11:22:01,734][34139] Updated weights for policy 0, policy_version 7706 (0.0009) -[2024-07-05 11:22:03,276][25826] Fps is (10 sec: 49151.9, 60 sec: 49288.9, 300 sec: 47930.6). Total num frames: 43196416. Throughput: 0: 12297.0. Samples: 3284944. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:22:03,277][25826] Avg episode reward: [(0, '38.774')] -[2024-07-05 11:22:03,392][34139] Updated weights for policy 0, policy_version 7716 (0.0008) -[2024-07-05 11:22:05,050][34139] Updated weights for policy 0, policy_version 7726 (0.0007) -[2024-07-05 11:22:06,852][34139] Updated weights for policy 0, policy_version 7736 (0.0007) -[2024-07-05 11:22:08,275][25826] Fps is (10 sec: 48332.6, 60 sec: 49288.5, 300 sec: 47923.2). Total num frames: 43433984. Throughput: 0: 12245.3. Samples: 3357616. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:22:08,276][25826] Avg episode reward: [(0, '39.546')] -[2024-07-05 11:22:08,546][34139] Updated weights for policy 0, policy_version 7746 (0.0008) -[2024-07-05 11:22:10,174][34139] Updated weights for policy 0, policy_version 7756 (0.0007) -[2024-07-05 11:22:11,791][34139] Updated weights for policy 0, policy_version 7766 (0.0008) -[2024-07-05 11:22:13,276][25826] Fps is (10 sec: 48330.0, 60 sec: 49151.5, 300 sec: 47944.7). Total num frames: 43679744. Throughput: 0: 12240.4. Samples: 3394448. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:22:13,277][25826] Avg episode reward: [(0, '39.117')] -[2024-07-05 11:22:13,433][34139] Updated weights for policy 0, policy_version 7776 (0.0009) -[2024-07-05 11:22:15,025][34139] Updated weights for policy 0, policy_version 7786 (0.0007) -[2024-07-05 11:22:16,678][34139] Updated weights for policy 0, policy_version 7796 (0.0008) -[2024-07-05 11:22:18,276][25826] Fps is (10 sec: 49970.9, 60 sec: 49152.0, 300 sec: 47993.8). Total num frames: 43933696. Throughput: 0: 12257.8. Samples: 3469856. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:22:18,277][25826] Avg episode reward: [(0, '42.312')] -[2024-07-05 11:22:18,284][34139] Updated weights for policy 0, policy_version 7806 (0.0011) -[2024-07-05 11:22:19,926][34139] Updated weights for policy 0, policy_version 7816 (0.0007) -[2024-07-05 11:22:21,599][34139] Updated weights for policy 0, policy_version 7826 (0.0010) -[2024-07-05 11:22:23,254][34139] Updated weights for policy 0, policy_version 7836 (0.0010) -[2024-07-05 11:22:23,276][25826] Fps is (10 sec: 50793.4, 60 sec: 49288.6, 300 sec: 48041.2). Total num frames: 44187648. Throughput: 0: 12306.7. Samples: 3545180. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:22:23,276][25826] Avg episode reward: [(0, '37.970')] -[2024-07-05 11:22:24,881][34139] Updated weights for policy 0, policy_version 7846 (0.0008) -[2024-07-05 11:22:26,507][34139] Updated weights for policy 0, policy_version 7856 (0.0007) -[2024-07-05 11:22:28,147][34139] Updated weights for policy 0, policy_version 7866 (0.0008) -[2024-07-05 11:22:28,276][25826] Fps is (10 sec: 49971.3, 60 sec: 49288.5, 300 sec: 48679.9). Total num frames: 44433408. Throughput: 0: 12335.6. Samples: 3582724. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:22:28,276][25826] Avg episode reward: [(0, '44.221')] -[2024-07-05 11:22:28,315][34119] Saving new best policy, reward=44.221! -[2024-07-05 11:22:29,794][34139] Updated weights for policy 0, policy_version 7876 (0.0011) -[2024-07-05 11:22:31,455][34139] Updated weights for policy 0, policy_version 7886 (0.0007) -[2024-07-05 11:22:33,091][34139] Updated weights for policy 0, policy_version 7896 (0.0007) -[2024-07-05 11:22:33,275][25826] Fps is (10 sec: 49152.2, 60 sec: 49152.0, 300 sec: 48735.5). Total num frames: 44679168. Throughput: 0: 12336.1. Samples: 3656500. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:22:33,277][25826] Avg episode reward: [(0, '42.789')] -[2024-07-05 11:22:34,786][34139] Updated weights for policy 0, policy_version 7906 (0.0010) -[2024-07-05 11:22:36,416][34139] Updated weights for policy 0, policy_version 7916 (0.0007) -[2024-07-05 11:22:38,056][34139] Updated weights for policy 0, policy_version 7926 (0.0009) -[2024-07-05 11:22:38,276][25826] Fps is (10 sec: 49971.1, 60 sec: 49425.8, 300 sec: 48791.0). Total num frames: 44933120. Throughput: 0: 12371.1. Samples: 3731788. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:22:38,277][25826] Avg episode reward: [(0, '40.745')] -[2024-07-05 11:22:39,666][34139] Updated weights for policy 0, policy_version 7936 (0.0008) -[2024-07-05 11:22:41,333][34139] Updated weights for policy 0, policy_version 7946 (0.0008) -[2024-07-05 11:22:42,971][34139] Updated weights for policy 0, policy_version 7956 (0.0007) -[2024-07-05 11:22:43,276][25826] Fps is (10 sec: 49970.9, 60 sec: 49425.0, 300 sec: 48818.8). Total num frames: 45178880. Throughput: 0: 12393.1. Samples: 3769328. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:22:43,276][25826] Avg episode reward: [(0, '43.134')] -[2024-07-05 11:22:44,585][34139] Updated weights for policy 0, policy_version 7966 (0.0008) -[2024-07-05 11:22:46,203][34139] Updated weights for policy 0, policy_version 7976 (0.0007) -[2024-07-05 11:22:47,890][34139] Updated weights for policy 0, policy_version 7986 (0.0007) -[2024-07-05 11:22:48,275][25826] Fps is (10 sec: 49971.5, 60 sec: 49561.6, 300 sec: 48846.6). Total num frames: 45432832. Throughput: 0: 12425.8. Samples: 3844104. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:22:48,276][25826] Avg episode reward: [(0, '42.353')] -[2024-07-05 11:22:49,585][34139] Updated weights for policy 0, policy_version 7996 (0.0008) -[2024-07-05 11:22:51,352][34139] Updated weights for policy 0, policy_version 8006 (0.0007) -[2024-07-05 11:22:53,056][34139] Updated weights for policy 0, policy_version 8016 (0.0007) -[2024-07-05 11:22:53,275][25826] Fps is (10 sec: 49152.6, 60 sec: 49425.1, 300 sec: 48818.8). Total num frames: 45670400. Throughput: 0: 12405.2. Samples: 3915848. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:22:53,276][25826] Avg episode reward: [(0, '40.998')] -[2024-07-05 11:22:54,796][34139] Updated weights for policy 0, policy_version 8026 (0.0008) -[2024-07-05 11:22:56,425][34139] Updated weights for policy 0, policy_version 8036 (0.0012) -[2024-07-05 11:22:58,112][34139] Updated weights for policy 0, policy_version 8046 (0.0007) -[2024-07-05 11:22:58,276][25826] Fps is (10 sec: 48332.6, 60 sec: 49425.0, 300 sec: 48846.5). Total num frames: 45916160. Throughput: 0: 12401.4. Samples: 3952504. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:22:58,277][25826] Avg episode reward: [(0, '40.198')] -[2024-07-05 11:22:59,708][34139] Updated weights for policy 0, policy_version 8056 (0.0008) -[2024-07-05 11:23:01,421][34139] Updated weights for policy 0, policy_version 8066 (0.0010) -[2024-07-05 11:23:03,137][34139] Updated weights for policy 0, policy_version 8076 (0.0008) -[2024-07-05 11:23:03,276][25826] Fps is (10 sec: 48332.0, 60 sec: 49288.5, 300 sec: 48818.7). Total num frames: 46153728. Throughput: 0: 12371.7. Samples: 4026584. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:23:03,277][25826] Avg episode reward: [(0, '38.134')] -[2024-07-05 11:23:04,767][34139] Updated weights for policy 0, policy_version 8086 (0.0008) -[2024-07-05 11:23:06,517][34139] Updated weights for policy 0, policy_version 8096 (0.0008) -[2024-07-05 11:23:08,190][34139] Updated weights for policy 0, policy_version 8106 (0.0008) -[2024-07-05 11:23:08,276][25826] Fps is (10 sec: 48331.7, 60 sec: 49424.9, 300 sec: 48846.5). Total num frames: 46399488. Throughput: 0: 12290.6. Samples: 4098260. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:23:08,277][25826] Avg episode reward: [(0, '41.508')] -[2024-07-05 11:23:09,890][34139] Updated weights for policy 0, policy_version 8116 (0.0010) -[2024-07-05 11:23:11,532][34139] Updated weights for policy 0, policy_version 8126 (0.0008) -[2024-07-05 11:23:13,201][34139] Updated weights for policy 0, policy_version 8136 (0.0010) -[2024-07-05 11:23:13,275][25826] Fps is (10 sec: 49152.7, 60 sec: 49425.6, 300 sec: 48846.6). Total num frames: 46645248. Throughput: 0: 12273.3. Samples: 4135020. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:23:13,276][25826] Avg episode reward: [(0, '39.410')] -[2024-07-05 11:23:14,874][34139] Updated weights for policy 0, policy_version 8146 (0.0008) -[2024-07-05 11:23:16,584][34139] Updated weights for policy 0, policy_version 8156 (0.0008) -[2024-07-05 11:23:18,276][25826] Fps is (10 sec: 48333.9, 60 sec: 49152.0, 300 sec: 48846.5). Total num frames: 46882816. Throughput: 0: 12245.1. Samples: 4207528. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:23:18,277][25826] Avg episode reward: [(0, '42.544')] -[2024-07-05 11:23:18,315][34119] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000008166_46891008.pth... -[2024-07-05 11:23:18,321][34139] Updated weights for policy 0, policy_version 8166 (0.0008) -[2024-07-05 11:23:18,380][34119] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000006727_35102720.pth -[2024-07-05 11:23:20,084][34139] Updated weights for policy 0, policy_version 8176 (0.0008) -[2024-07-05 11:23:21,825][34139] Updated weights for policy 0, policy_version 8186 (0.0009) -[2024-07-05 11:23:23,275][25826] Fps is (10 sec: 47513.9, 60 sec: 48879.0, 300 sec: 48818.8). Total num frames: 47120384. Throughput: 0: 12155.6. Samples: 4278788. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:23:23,276][25826] Avg episode reward: [(0, '39.461')] -[2024-07-05 11:23:23,546][34139] Updated weights for policy 0, policy_version 8196 (0.0011) -[2024-07-05 11:23:25,211][34139] Updated weights for policy 0, policy_version 8206 (0.0008) -[2024-07-05 11:23:26,878][34139] Updated weights for policy 0, policy_version 8216 (0.0008) -[2024-07-05 11:23:28,276][25826] Fps is (10 sec: 48332.9, 60 sec: 48878.9, 300 sec: 48874.3). Total num frames: 47366144. Throughput: 0: 12131.5. Samples: 4315244. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:23:28,277][25826] Avg episode reward: [(0, '43.466')] -[2024-07-05 11:23:28,564][34139] Updated weights for policy 0, policy_version 8226 (0.0007) -[2024-07-05 11:23:30,276][34139] Updated weights for policy 0, policy_version 8236 (0.0008) -[2024-07-05 11:23:31,988][34139] Updated weights for policy 0, policy_version 8246 (0.0008) -[2024-07-05 11:23:33,276][25826] Fps is (10 sec: 48332.1, 60 sec: 48742.4, 300 sec: 48874.3). Total num frames: 47603712. Throughput: 0: 12090.1. Samples: 4388160. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:23:33,277][25826] Avg episode reward: [(0, '39.738')] -[2024-07-05 11:23:33,631][34139] Updated weights for policy 0, policy_version 8256 (0.0008) -[2024-07-05 11:23:35,226][34139] Updated weights for policy 0, policy_version 8266 (0.0007) -[2024-07-05 11:23:36,901][34139] Updated weights for policy 0, policy_version 8276 (0.0008) -[2024-07-05 11:23:38,276][25826] Fps is (10 sec: 49151.9, 60 sec: 48742.4, 300 sec: 48929.8). Total num frames: 47857664. Throughput: 0: 12146.6. Samples: 4462448. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:23:38,277][25826] Avg episode reward: [(0, '41.304')] -[2024-07-05 11:23:38,578][34139] Updated weights for policy 0, policy_version 8286 (0.0007) -[2024-07-05 11:23:40,204][34139] Updated weights for policy 0, policy_version 8296 (0.0008) -[2024-07-05 11:23:41,855][34139] Updated weights for policy 0, policy_version 8306 (0.0008) -[2024-07-05 11:23:43,276][25826] Fps is (10 sec: 49971.3, 60 sec: 48742.4, 300 sec: 48957.6). Total num frames: 48103424. Throughput: 0: 12154.4. Samples: 4499452. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:23:43,277][25826] Avg episode reward: [(0, '42.313')] -[2024-07-05 11:23:43,582][34139] Updated weights for policy 0, policy_version 8316 (0.0008) -[2024-07-05 11:23:45,266][34139] Updated weights for policy 0, policy_version 8326 (0.0007) -[2024-07-05 11:23:46,949][34139] Updated weights for policy 0, policy_version 8336 (0.0007) -[2024-07-05 11:23:48,275][25826] Fps is (10 sec: 49152.3, 60 sec: 48605.9, 300 sec: 48985.4). Total num frames: 48349184. Throughput: 0: 12145.8. Samples: 4573144. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:23:48,277][25826] Avg episode reward: [(0, '42.888')] -[2024-07-05 11:23:48,594][34139] Updated weights for policy 0, policy_version 8346 (0.0009) -[2024-07-05 11:23:50,289][34139] Updated weights for policy 0, policy_version 8356 (0.0007) -[2024-07-05 11:23:51,878][34139] Updated weights for policy 0, policy_version 8366 (0.0008) -[2024-07-05 11:23:53,276][25826] Fps is (10 sec: 49152.1, 60 sec: 48742.3, 300 sec: 49040.9). Total num frames: 48594944. Throughput: 0: 12198.5. Samples: 4647188. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:23:53,277][25826] Avg episode reward: [(0, '42.817')] -[2024-07-05 11:23:53,536][34139] Updated weights for policy 0, policy_version 8376 (0.0008) -[2024-07-05 11:23:55,182][34139] Updated weights for policy 0, policy_version 8386 (0.0007) -[2024-07-05 11:23:56,795][34139] Updated weights for policy 0, policy_version 8396 (0.0008) -[2024-07-05 11:23:58,276][25826] Fps is (10 sec: 49151.1, 60 sec: 48742.3, 300 sec: 49040.9). Total num frames: 48840704. Throughput: 0: 12209.9. Samples: 4684468. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:23:58,276][25826] Avg episode reward: [(0, '44.842')] -[2024-07-05 11:23:58,320][34119] Saving new best policy, reward=44.842! -[2024-07-05 11:23:58,454][34139] Updated weights for policy 0, policy_version 8406 (0.0007) -[2024-07-05 11:24:00,090][34139] Updated weights for policy 0, policy_version 8416 (0.0008) -[2024-07-05 11:24:01,758][34139] Updated weights for policy 0, policy_version 8426 (0.0007) -[2024-07-05 11:24:03,275][25826] Fps is (10 sec: 49971.6, 60 sec: 49015.6, 300 sec: 49068.7). Total num frames: 49094656. Throughput: 0: 12270.5. Samples: 4759700. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:24:03,276][25826] Avg episode reward: [(0, '40.153')] -[2024-07-05 11:24:03,353][34139] Updated weights for policy 0, policy_version 8436 (0.0009) -[2024-07-05 11:24:04,989][34139] Updated weights for policy 0, policy_version 8446 (0.0008) -[2024-07-05 11:24:06,587][34139] Updated weights for policy 0, policy_version 8456 (0.0008) -[2024-07-05 11:24:08,248][34139] Updated weights for policy 0, policy_version 8466 (0.0008) -[2024-07-05 11:24:08,276][25826] Fps is (10 sec: 50791.1, 60 sec: 49152.2, 300 sec: 49124.2). Total num frames: 49348608. Throughput: 0: 12352.1. Samples: 4834636. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:24:08,276][25826] Avg episode reward: [(0, '43.179')] -[2024-07-05 11:24:09,988][34139] Updated weights for policy 0, policy_version 8476 (0.0007) -[2024-07-05 11:24:11,692][34139] Updated weights for policy 0, policy_version 8486 (0.0008) -[2024-07-05 11:24:13,276][25826] Fps is (10 sec: 49151.7, 60 sec: 49015.4, 300 sec: 49096.5). Total num frames: 49586176. Throughput: 0: 12344.6. Samples: 4870752. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:24:13,277][25826] Avg episode reward: [(0, '40.811')] -[2024-07-05 11:24:13,347][34139] Updated weights for policy 0, policy_version 8496 (0.0007) -[2024-07-05 11:24:14,960][34139] Updated weights for policy 0, policy_version 8506 (0.0007) -[2024-07-05 11:24:16,591][34139] Updated weights for policy 0, policy_version 8516 (0.0008) -[2024-07-05 11:24:18,234][34139] Updated weights for policy 0, policy_version 8526 (0.0010) -[2024-07-05 11:24:18,275][25826] Fps is (10 sec: 49152.1, 60 sec: 49288.6, 300 sec: 49096.5). Total num frames: 49840128. Throughput: 0: 12382.1. Samples: 4945352. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:24:18,277][25826] Avg episode reward: [(0, '41.759')] -[2024-07-05 11:24:19,902][34139] Updated weights for policy 0, policy_version 8536 (0.0007) -[2024-07-05 11:24:21,531][34139] Updated weights for policy 0, policy_version 8546 (0.0009) -[2024-07-05 11:24:21,716][34119] Stopping Batcher_0... -[2024-07-05 11:24:21,716][34119] Loop batcher_evt_loop terminating... -[2024-07-05 11:24:21,716][25826] Component Batcher_0 stopped! -[2024-07-05 11:24:21,726][34119] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000008547_50012160.pth... -[2024-07-05 11:24:21,757][34139] Weights refcount: 2 0 -[2024-07-05 11:24:21,782][34145] Stopping RolloutWorker_w5... -[2024-07-05 11:24:21,782][34149] Stopping RolloutWorker_w9... -[2024-07-05 11:24:21,782][34140] Stopping RolloutWorker_w0... -[2024-07-05 11:24:21,782][34145] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 11:24:21,782][34140] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 11:24:21,782][25826] Component RolloutWorker_w5 stopped! -[2024-07-05 11:24:21,783][34149] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 11:24:21,783][34165] Stopping RolloutWorker_w12... -[2024-07-05 11:24:21,783][25826] Component RolloutWorker_w9 stopped! -[2024-07-05 11:24:21,784][34165] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 11:24:21,784][34169] Stopping RolloutWorker_w13... -[2024-07-05 11:24:21,784][34146] Stopping RolloutWorker_w6... -[2024-07-05 11:24:21,784][34148] Stopping RolloutWorker_w8... -[2024-07-05 11:24:21,784][34166] Stopping RolloutWorker_w10... -[2024-07-05 11:24:21,784][34142] Stopping RolloutWorker_w3... -[2024-07-05 11:24:21,784][34168] Stopping RolloutWorker_w14... -[2024-07-05 11:24:21,784][34144] Stopping RolloutWorker_w4... -[2024-07-05 11:24:21,784][34147] Stopping RolloutWorker_w7... -[2024-07-05 11:24:21,784][25826] Component RolloutWorker_w0 stopped! -[2024-07-05 11:24:21,784][34167] Stopping RolloutWorker_w11... -[2024-07-05 11:24:21,784][34169] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 11:24:21,784][34166] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 11:24:21,784][34146] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 11:24:21,784][34142] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 11:24:21,784][34170] Stopping RolloutWorker_w15... -[2024-07-05 11:24:21,785][34168] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 11:24:21,785][34144] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 11:24:21,785][34167] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 11:24:21,785][34170] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 11:24:21,785][34141] Stopping RolloutWorker_w1... -[2024-07-05 11:24:21,785][25826] Component RolloutWorker_w12 stopped! -[2024-07-05 11:24:21,785][34148] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 11:24:21,785][34147] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 11:24:21,785][34141] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 11:24:21,785][25826] Component RolloutWorker_w13 stopped! -[2024-07-05 11:24:21,786][25826] Component RolloutWorker_w6 stopped! -[2024-07-05 11:24:21,787][25826] Component RolloutWorker_w10 stopped! -[2024-07-05 11:24:21,787][34119] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000007445_40984576.pth -[2024-07-05 11:24:21,787][25826] Component RolloutWorker_w8 stopped! -[2024-07-05 11:24:21,788][25826] Component RolloutWorker_w3 stopped! -[2024-07-05 11:24:21,789][25826] Component RolloutWorker_w7 stopped! -[2024-07-05 11:24:21,790][25826] Component RolloutWorker_w14 stopped! -[2024-07-05 11:24:21,790][34139] Stopping InferenceWorker_p0-w0... -[2024-07-05 11:24:21,791][34139] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 11:24:21,790][25826] Component RolloutWorker_w4 stopped! -[2024-07-05 11:24:21,791][25826] Component RolloutWorker_w11 stopped! -[2024-07-05 11:24:21,791][25826] Component RolloutWorker_w15 stopped! -[2024-07-05 11:24:21,792][25826] Component RolloutWorker_w1 stopped! -[2024-07-05 11:24:21,792][25826] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 11:24:21,796][34119] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000008547_50012160.pth... -[2024-07-05 11:24:21,812][34143] Stopping RolloutWorker_w2... -[2024-07-05 11:24:21,812][34143] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 11:24:21,812][25826] Component RolloutWorker_w2 stopped! -[2024-07-05 11:24:21,901][25826] Component LearnerWorker_p0 stopped! -[2024-07-05 11:24:21,902][25826] Waiting for process learner_proc0 to stop... -[2024-07-05 11:24:21,901][34119] Stopping LearnerWorker_p0... -[2024-07-05 11:24:21,905][34119] Loop learner_proc0_evt_loop terminating... -[2024-07-05 11:24:23,273][25826] Waiting for process inference_proc0-0 to join... -[2024-07-05 11:24:23,274][25826] Waiting for process rollout_proc0 to join... -[2024-07-05 11:24:23,274][25826] Waiting for process rollout_proc1 to join... -[2024-07-05 11:24:23,275][25826] Waiting for process rollout_proc2 to join... -[2024-07-05 11:24:23,275][25826] Waiting for process rollout_proc3 to join... -[2024-07-05 11:24:23,275][25826] Waiting for process rollout_proc4 to join... -[2024-07-05 11:24:23,275][25826] Waiting for process rollout_proc5 to join... -[2024-07-05 11:24:23,276][25826] Waiting for process rollout_proc6 to join... -[2024-07-05 11:24:23,276][25826] Waiting for process rollout_proc7 to join... -[2024-07-05 11:24:23,276][25826] Waiting for process rollout_proc8 to join... -[2024-07-05 11:24:23,276][25826] Waiting for process rollout_proc9 to join... -[2024-07-05 11:24:23,276][25826] Waiting for process rollout_proc10 to join... -[2024-07-05 11:24:23,277][25826] Waiting for process rollout_proc11 to join... -[2024-07-05 11:24:23,277][25826] Waiting for process rollout_proc12 to join... -[2024-07-05 11:24:23,277][25826] Waiting for process rollout_proc13 to join... -[2024-07-05 11:24:23,277][25826] Waiting for process rollout_proc14 to join... -[2024-07-05 11:24:23,277][25826] Waiting for process rollout_proc15 to join... -[2024-07-05 11:24:23,278][25826] Batcher 0 profile tree view: -batching: 29.3748, releasing_batches: 0.0533 -[2024-07-05 11:24:23,278][25826] InferenceWorker_p0-w0 profile tree view: -wait_policy: 0.0000 - wait_policy_total: 22.7316 -update_model: 6.2589 - weight_update: 0.0009 -one_step: 0.0043 - handle_policy_step: 372.7003 - deserialize: 28.1308, stack: 2.0092, obs_to_device_normalize: 89.0804, forward: 174.5551, send_messages: 18.6752 - prepare_outputs: 48.0542 - to_cpu: 29.3971 -[2024-07-05 11:24:23,278][25826] Learner 0 profile tree view: -misc: 0.0093, prepare_batch: 40.1743 -train: 87.3712 - epoch_init: 0.0076, minibatch_init: 0.0109, losses_postprocess: 0.5073, kl_divergence: 0.5443, after_optimizer: 0.6351 - calculate_losses: 30.8131 - losses_init: 0.0046, forward_head: 1.3814, bptt_initial: 24.2246, tail: 1.0678, advantages_returns: 0.2913, losses: 1.6990 - bptt: 1.8201 - bptt_forward_core: 1.7337 - update: 54.0816 - clip: 1.6085 -[2024-07-05 11:24:23,278][25826] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.1761, enqueue_policy_requests: 12.2728, env_step: 192.4280, overhead: 19.1979, complete_rollouts: 0.4131 -save_policy_outputs: 14.6346 - split_output_tensors: 6.8221 -[2024-07-05 11:24:23,278][25826] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.1914, enqueue_policy_requests: 12.6909, env_step: 197.9647, overhead: 20.1909, complete_rollouts: 0.3917 -save_policy_outputs: 14.7956 - split_output_tensors: 6.8693 -[2024-07-05 11:24:23,279][25826] Loop Runner_EvtLoop terminating... -[2024-07-05 11:24:23,279][25826] Runner profile tree view: -main_loop: 422.4908 -[2024-07-05 11:24:23,279][25826] Collected {0: 50012160}, FPS: 47330.4 -[2024-07-05 11:25:03,678][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 11:25:03,679][25826] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 11:25:03,679][25826] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 11:25:03,679][25826] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 11:25:03,680][25826] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 11:25:03,680][25826] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 11:25:03,680][25826] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 11:25:03,680][25826] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 11:25:03,681][25826] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 11:25:03,681][25826] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 11:25:03,682][25826] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 11:25:03,682][25826] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 11:25:03,682][25826] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 11:25:03,682][25826] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 11:25:03,683][25826] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 11:25:03,703][25826] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:25:03,705][25826] RunningMeanStd input shape: (1,) -[2024-07-05 11:25:03,720][25826] ConvEncoder: input_channels=3 -[2024-07-05 11:25:03,757][25826] Conv encoder output size: 512 -[2024-07-05 11:25:03,758][25826] Policy head output size: 512 -[2024-07-05 11:25:03,787][25826] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000008547_50012160.pth... -[2024-07-05 11:25:04,440][25826] Num frames 100... -[2024-07-05 11:25:04,502][25826] Num frames 200... -[2024-07-05 11:25:04,560][25826] Num frames 300... -[2024-07-05 11:25:04,619][25826] Num frames 400... -[2024-07-05 11:25:04,678][25826] Num frames 500... -[2024-07-05 11:25:04,739][25826] Num frames 600... -[2024-07-05 11:25:04,798][25826] Num frames 700... -[2024-07-05 11:25:04,854][25826] Num frames 800... -[2024-07-05 11:25:04,913][25826] Num frames 900... -[2024-07-05 11:25:04,969][25826] Num frames 1000... -[2024-07-05 11:25:05,032][25826] Num frames 1100... -[2024-07-05 11:25:05,094][25826] Num frames 1200... -[2024-07-05 11:25:05,152][25826] Num frames 1300... -[2024-07-05 11:25:05,209][25826] Num frames 1400... -[2024-07-05 11:25:05,268][25826] Num frames 1500... -[2024-07-05 11:25:05,328][25826] Num frames 1600... -[2024-07-05 11:25:05,386][25826] Num frames 1700... -[2024-07-05 11:25:05,445][25826] Num frames 1800... -[2024-07-05 11:25:05,505][25826] Num frames 1900... -[2024-07-05 11:25:05,564][25826] Num frames 2000... -[2024-07-05 11:25:05,625][25826] Num frames 2100... -[2024-07-05 11:25:05,676][25826] Avg episode rewards: #0: 61.999, true rewards: #0: 21.000 -[2024-07-05 11:25:05,678][25826] Avg episode reward: 61.999, avg true_objective: 21.000 -[2024-07-05 11:25:05,742][25826] Num frames 2200... -[2024-07-05 11:25:05,801][25826] Num frames 2300... -[2024-07-05 11:25:05,861][25826] Num frames 2400... -[2024-07-05 11:25:05,920][25826] Num frames 2500... -[2024-07-05 11:25:05,983][25826] Num frames 2600... -[2024-07-05 11:25:06,044][25826] Num frames 2700... -[2024-07-05 11:25:06,104][25826] Num frames 2800... -[2024-07-05 11:25:06,166][25826] Num frames 2900... -[2024-07-05 11:25:06,231][25826] Num frames 3000... -[2024-07-05 11:25:06,293][25826] Num frames 3100... -[2024-07-05 11:25:06,352][25826] Num frames 3200... -[2024-07-05 11:25:06,412][25826] Num frames 3300... -[2024-07-05 11:25:06,472][25826] Num frames 3400... -[2024-07-05 11:25:06,535][25826] Num frames 3500... -[2024-07-05 11:25:06,625][25826] Avg episode rewards: #0: 49.794, true rewards: #0: 17.795 -[2024-07-05 11:25:06,627][25826] Avg episode reward: 49.794, avg true_objective: 17.795 -[2024-07-05 11:25:06,656][25826] Num frames 3600... -[2024-07-05 11:25:06,717][25826] Num frames 3700... -[2024-07-05 11:25:06,776][25826] Num frames 3800... -[2024-07-05 11:25:06,835][25826] Num frames 3900... -[2024-07-05 11:25:06,893][25826] Num frames 4000... -[2024-07-05 11:25:06,971][25826] Avg episode rewards: #0: 35.796, true rewards: #0: 13.463 -[2024-07-05 11:25:06,973][25826] Avg episode reward: 35.796, avg true_objective: 13.463 -[2024-07-05 11:25:07,025][25826] Num frames 4100... -[2024-07-05 11:25:07,092][25826] Num frames 4200... -[2024-07-05 11:25:07,153][25826] Num frames 4300... -[2024-07-05 11:25:07,218][25826] Num frames 4400... -[2024-07-05 11:25:07,278][25826] Num frames 4500... -[2024-07-05 11:25:07,338][25826] Num frames 4600... -[2024-07-05 11:25:07,396][25826] Num frames 4700... -[2024-07-05 11:25:07,456][25826] Num frames 4800... -[2024-07-05 11:25:07,514][25826] Num frames 4900... -[2024-07-05 11:25:07,572][25826] Num frames 5000... -[2024-07-05 11:25:07,631][25826] Num frames 5100... -[2024-07-05 11:25:07,692][25826] Num frames 5200... -[2024-07-05 11:25:07,752][25826] Num frames 5300... -[2024-07-05 11:25:07,814][25826] Num frames 5400... -[2024-07-05 11:25:07,874][25826] Num frames 5500... -[2024-07-05 11:25:07,935][25826] Num frames 5600... -[2024-07-05 11:25:07,995][25826] Num frames 5700... -[2024-07-05 11:25:08,055][25826] Num frames 5800... -[2024-07-05 11:25:08,117][25826] Num frames 5900... -[2024-07-05 11:25:08,178][25826] Num frames 6000... -[2024-07-05 11:25:08,237][25826] Num frames 6100... -[2024-07-05 11:25:08,316][25826] Avg episode rewards: #0: 41.347, true rewards: #0: 15.348 -[2024-07-05 11:25:08,318][25826] Avg episode reward: 41.347, avg true_objective: 15.348 -[2024-07-05 11:25:08,359][25826] Num frames 6200... -[2024-07-05 11:25:08,418][25826] Num frames 6300... -[2024-07-05 11:25:08,478][25826] Num frames 6400... -[2024-07-05 11:25:08,537][25826] Num frames 6500... -[2024-07-05 11:25:08,596][25826] Num frames 6600... -[2024-07-05 11:25:08,655][25826] Num frames 6700... -[2024-07-05 11:25:08,715][25826] Num frames 6800... -[2024-07-05 11:25:08,775][25826] Num frames 6900... -[2024-07-05 11:25:08,834][25826] Num frames 7000... -[2024-07-05 11:25:08,895][25826] Num frames 7100... -[2024-07-05 11:25:08,956][25826] Num frames 7200... -[2024-07-05 11:25:09,018][25826] Num frames 7300... -[2024-07-05 11:25:09,077][25826] Num frames 7400... -[2024-07-05 11:25:09,137][25826] Num frames 7500... -[2024-07-05 11:25:09,197][25826] Num frames 7600... -[2024-07-05 11:25:09,257][25826] Num frames 7700... -[2024-07-05 11:25:09,367][25826] Avg episode rewards: #0: 40.987, true rewards: #0: 15.588 -[2024-07-05 11:25:09,369][25826] Avg episode reward: 40.987, avg true_objective: 15.588 -[2024-07-05 11:25:09,374][25826] Num frames 7800... -[2024-07-05 11:25:09,435][25826] Num frames 7900... -[2024-07-05 11:25:09,494][25826] Num frames 8000... -[2024-07-05 11:25:09,553][25826] Num frames 8100... -[2024-07-05 11:25:09,612][25826] Num frames 8200... -[2024-07-05 11:25:09,671][25826] Num frames 8300... -[2024-07-05 11:25:09,730][25826] Num frames 8400... -[2024-07-05 11:25:09,788][25826] Num frames 8500... -[2024-07-05 11:25:09,846][25826] Num frames 8600... -[2024-07-05 11:25:09,912][25826] Num frames 8700... -[2024-07-05 11:25:09,973][25826] Num frames 8800... -[2024-07-05 11:25:10,033][25826] Num frames 8900... -[2024-07-05 11:25:10,092][25826] Num frames 9000... -[2024-07-05 11:25:10,152][25826] Avg episode rewards: #0: 39.681, true rewards: #0: 15.015 -[2024-07-05 11:25:10,153][25826] Avg episode reward: 39.681, avg true_objective: 15.015 -[2024-07-05 11:25:10,211][25826] Num frames 9100... -[2024-07-05 11:25:10,270][25826] Num frames 9200... -[2024-07-05 11:25:10,330][25826] Num frames 9300... -[2024-07-05 11:25:10,390][25826] Num frames 9400... -[2024-07-05 11:25:10,449][25826] Num frames 9500... -[2024-07-05 11:25:10,513][25826] Num frames 9600... -[2024-07-05 11:25:10,575][25826] Num frames 9700... -[2024-07-05 11:25:10,637][25826] Num frames 9800... -[2024-07-05 11:25:10,701][25826] Num frames 9900... -[2024-07-05 11:25:10,765][25826] Num frames 10000... -[2024-07-05 11:25:10,830][25826] Num frames 10100... -[2024-07-05 11:25:10,895][25826] Num frames 10200... -[2024-07-05 11:25:10,957][25826] Num frames 10300... -[2024-07-05 11:25:11,018][25826] Num frames 10400... -[2024-07-05 11:25:11,079][25826] Num frames 10500... -[2024-07-05 11:25:11,139][25826] Num frames 10600... -[2024-07-05 11:25:11,202][25826] Num frames 10700... -[2024-07-05 11:25:11,267][25826] Num frames 10800... -[2024-07-05 11:25:11,329][25826] Num frames 10900... -[2024-07-05 11:25:11,393][25826] Num frames 11000... -[2024-07-05 11:25:11,453][25826] Num frames 11100... -[2024-07-05 11:25:11,513][25826] Avg episode rewards: #0: 42.298, true rewards: #0: 15.870 -[2024-07-05 11:25:11,514][25826] Avg episode reward: 42.298, avg true_objective: 15.870 -[2024-07-05 11:25:11,574][25826] Num frames 11200... -[2024-07-05 11:25:11,636][25826] Num frames 11300... -[2024-07-05 11:25:11,695][25826] Num frames 11400... -[2024-07-05 11:25:11,757][25826] Num frames 11500... -[2024-07-05 11:25:11,817][25826] Num frames 11600... -[2024-07-05 11:25:11,876][25826] Num frames 11700... -[2024-07-05 11:25:11,937][25826] Num frames 11800... -[2024-07-05 11:25:11,998][25826] Num frames 11900... -[2024-07-05 11:25:12,057][25826] Num frames 12000... -[2024-07-05 11:25:12,118][25826] Num frames 12100... -[2024-07-05 11:25:12,230][25826] Avg episode rewards: #0: 40.246, true rewards: #0: 15.246 -[2024-07-05 11:25:12,232][25826] Avg episode reward: 40.246, avg true_objective: 15.246 -[2024-07-05 11:25:12,234][25826] Num frames 12200... -[2024-07-05 11:25:12,297][25826] Num frames 12300... -[2024-07-05 11:25:12,357][25826] Num frames 12400... -[2024-07-05 11:25:12,420][25826] Num frames 12500... -[2024-07-05 11:25:12,481][25826] Num frames 12600... -[2024-07-05 11:25:12,542][25826] Num frames 12700... -[2024-07-05 11:25:12,602][25826] Num frames 12800... -[2024-07-05 11:25:12,662][25826] Num frames 12900... -[2024-07-05 11:25:12,722][25826] Num frames 13000... -[2024-07-05 11:25:12,784][25826] Num frames 13100... -[2024-07-05 11:25:12,846][25826] Num frames 13200... -[2024-07-05 11:25:12,907][25826] Num frames 13300... -[2024-07-05 11:25:12,973][25826] Num frames 13400... -[2024-07-05 11:25:13,032][25826] Num frames 13500... -[2024-07-05 11:25:13,100][25826] Num frames 13600... -[2024-07-05 11:25:13,161][25826] Num frames 13700... -[2024-07-05 11:25:13,234][25826] Num frames 13800... -[2024-07-05 11:25:13,294][25826] Num frames 13900... -[2024-07-05 11:25:13,365][25826] Num frames 14000... -[2024-07-05 11:25:13,424][25826] Num frames 14100... -[2024-07-05 11:25:13,484][25826] Num frames 14200... -[2024-07-05 11:25:13,594][25826] Avg episode rewards: #0: 42.774, true rewards: #0: 15.886 -[2024-07-05 11:25:13,595][25826] Avg episode reward: 42.774, avg true_objective: 15.886 -[2024-07-05 11:25:13,598][25826] Num frames 14300... -[2024-07-05 11:25:13,660][25826] Num frames 14400... -[2024-07-05 11:25:13,718][25826] Num frames 14500... -[2024-07-05 11:25:13,778][25826] Num frames 14600... -[2024-07-05 11:25:13,838][25826] Num frames 14700... -[2024-07-05 11:25:13,898][25826] Num frames 14800... -[2024-07-05 11:25:13,958][25826] Num frames 14900... -[2024-07-05 11:25:14,018][25826] Num frames 15000... -[2024-07-05 11:25:14,077][25826] Num frames 15100... -[2024-07-05 11:25:14,146][25826] Num frames 15200... -[2024-07-05 11:25:14,204][25826] Num frames 15300... -[2024-07-05 11:25:14,263][25826] Num frames 15400... -[2024-07-05 11:25:14,325][25826] Num frames 15500... -[2024-07-05 11:25:14,385][25826] Num frames 15600... -[2024-07-05 11:25:14,447][25826] Num frames 15700... -[2024-07-05 11:25:14,508][25826] Num frames 15800... -[2024-07-05 11:25:14,572][25826] Num frames 15900... -[2024-07-05 11:25:14,661][25826] Avg episode rewards: #0: 43.353, true rewards: #0: 15.954 -[2024-07-05 11:25:14,662][25826] Avg episode reward: 43.353, avg true_objective: 15.954 -[2024-07-05 11:25:31,220][25826] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 11:29:09,666][25826] Environment doom_basic already registered, overwriting... -[2024-07-05 11:29:09,667][25826] Environment doom_two_colors_easy already registered, overwriting... -[2024-07-05 11:29:09,667][25826] Environment doom_two_colors_hard already registered, overwriting... -[2024-07-05 11:29:09,668][25826] Environment doom_dm already registered, overwriting... -[2024-07-05 11:29:09,668][25826] Environment doom_dwango5 already registered, overwriting... -[2024-07-05 11:29:09,668][25826] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-07-05 11:29:09,668][25826] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-07-05 11:29:09,669][25826] Environment doom_my_way_home already registered, overwriting... -[2024-07-05 11:29:09,669][25826] Environment doom_deadly_corridor already registered, overwriting... -[2024-07-05 11:29:09,669][25826] Environment doom_defend_the_center already registered, overwriting... -[2024-07-05 11:29:09,669][25826] Environment doom_defend_the_line already registered, overwriting... -[2024-07-05 11:29:09,670][25826] Environment doom_health_gathering already registered, overwriting... -[2024-07-05 11:29:09,670][25826] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-07-05 11:29:09,670][25826] Environment doom_battle already registered, overwriting... -[2024-07-05 11:29:09,670][25826] Environment doom_battle2 already registered, overwriting... -[2024-07-05 11:29:09,671][25826] Environment doom_duel_bots already registered, overwriting... -[2024-07-05 11:29:09,671][25826] Environment doom_deathmatch_bots already registered, overwriting... -[2024-07-05 11:29:09,671][25826] Environment doom_duel already registered, overwriting... -[2024-07-05 11:29:09,671][25826] Environment doom_deathmatch_full already registered, overwriting... -[2024-07-05 11:29:09,672][25826] Environment doom_benchmark already registered, overwriting... -[2024-07-05 11:29:09,672][25826] register_encoder_factory: -[2024-07-05 11:29:09,678][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 11:29:09,678][25826] Overriding arg 'train_for_env_steps' with value 70000000 passed from command line -[2024-07-05 11:29:09,682][25826] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment already exists! -[2024-07-05 11:29:09,683][25826] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment... -[2024-07-05 11:29:09,683][25826] Weights and Biases integration disabled -[2024-07-05 11:29:09,684][25826] Environment var CUDA_VISIBLE_DEVICES is 0 - -[2024-07-05 11:29:12,602][25826] Starting experiment with the following configuration: -help=False -algo=APPO -env=doom_health_gathering_supreme -experiment=default_experiment -train_dir=/home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir -restart_behavior=resume -device=gpu -seed=200 -num_policies=1 -async_rl=True -serial_mode=False -batched_sampling=False -num_batches_to_accumulate=2 -worker_num_splits=2 -policy_workers_per_policy=1 -max_policy_lag=1000 -num_workers=16 -num_envs_per_worker=8 -batch_size=2048 -num_batches_per_epoch=1 -num_epochs=1 -rollout=32 -recurrence=32 -shuffle_minibatches=False -gamma=0.99 -reward_scale=1.0 -reward_clip=1000.0 -value_bootstrap=False -normalize_returns=True -exploration_loss_coeff=0.001 -value_loss_coeff=0.5 -kl_loss_coeff=0.0 -exploration_loss=symmetric_kl -gae_lambda=0.95 -ppo_clip_ratio=0.1 -ppo_clip_value=0.2 -with_vtrace=False -vtrace_rho=1.0 -vtrace_c=1.0 -optimizer=adam -adam_eps=1e-06 -adam_beta1=0.9 -adam_beta2=0.999 -max_grad_norm=4.0 -learning_rate=0.0001 -lr_schedule=constant -lr_schedule_kl_threshold=0.008 -lr_adaptive_min=1e-06 -lr_adaptive_max=0.01 -obs_subtract_mean=0.0 -obs_scale=255.0 -normalize_input=True -normalize_input_keys=None -decorrelate_experience_max_seconds=0 -decorrelate_envs_on_one_worker=True -actor_worker_gpus=[] -set_workers_cpu_affinity=True -force_envs_single_thread=False -default_niceness=0 -log_to_file=True -experiment_summaries_interval=10 -flush_summaries_interval=30 -stats_avg=100 -summaries_use_frameskip=True -heartbeat_interval=20 -heartbeat_reporting_interval=600 -train_for_env_steps=70000000 -train_for_seconds=10000000000 -save_every_sec=120 -keep_checkpoints=2 -load_checkpoint_kind=latest -save_milestones_sec=-1 -save_best_every_sec=5 -save_best_metric=reward -save_best_after=100000 -benchmark=False -encoder_mlp_layers=[512, 512] -encoder_conv_architecture=convnet_simple -encoder_conv_mlp_layers=[512] -use_rnn=True -rnn_size=512 -rnn_type=gru -rnn_num_layers=1 -decoder_mlp_layers=[] -nonlinearity=elu -policy_initialization=orthogonal -policy_init_gain=1.0 -actor_critic_share_weights=True -adaptive_stddev=True -continuous_tanh_scale=0.0 -initial_stddev=1.0 -use_env_info_cache=False -env_gpu_actions=False -env_gpu_observations=True -env_frameskip=4 -env_framestack=1 -pixel_format=CHW -use_record_episode_statistics=False -with_wandb=False -wandb_user=None -wandb_project=sample_factory -wandb_group=None -wandb_job_type=SF -wandb_tags=[] -with_pbt=False -pbt_mix_policies_in_one_env=True -pbt_period_env_steps=5000000 -pbt_start_mutation=20000000 -pbt_replace_fraction=0.3 -pbt_mutation_rate=0.15 -pbt_replace_reward_gap=0.1 -pbt_replace_reward_gap_absolute=1e-06 -pbt_optimize_gamma=False -pbt_target_objective=true_objective -pbt_perturb_min=1.1 -pbt_perturb_max=1.5 -num_agents=-1 -num_humans=0 -num_bots=-1 -start_bot_difficulty=None -timelimit=None -res_w=128 -res_h=72 -wide_aspect_ratio=False -eval_env_frameskip=1 -fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=20000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 20000000} -git_hash=unknown -git_repo_name=not a git repository -[2024-07-05 11:29:12,603][25826] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 11:29:12,604][25826] Rollout worker 0 uses device cpu -[2024-07-05 11:29:12,604][25826] Rollout worker 1 uses device cpu -[2024-07-05 11:29:12,604][25826] Rollout worker 2 uses device cpu -[2024-07-05 11:29:12,605][25826] Rollout worker 3 uses device cpu -[2024-07-05 11:29:12,605][25826] Rollout worker 4 uses device cpu -[2024-07-05 11:29:12,605][25826] Rollout worker 5 uses device cpu -[2024-07-05 11:29:12,605][25826] Rollout worker 6 uses device cpu -[2024-07-05 11:29:12,606][25826] Rollout worker 7 uses device cpu -[2024-07-05 11:29:12,606][25826] Rollout worker 8 uses device cpu -[2024-07-05 11:29:12,606][25826] Rollout worker 9 uses device cpu -[2024-07-05 11:29:12,606][25826] Rollout worker 10 uses device cpu -[2024-07-05 11:29:12,606][25826] Rollout worker 11 uses device cpu -[2024-07-05 11:29:12,607][25826] Rollout worker 12 uses device cpu -[2024-07-05 11:29:12,607][25826] Rollout worker 13 uses device cpu -[2024-07-05 11:29:12,607][25826] Rollout worker 14 uses device cpu -[2024-07-05 11:29:12,607][25826] Rollout worker 15 uses device cpu -[2024-07-05 11:29:12,697][25826] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:29:12,698][25826] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 11:29:12,748][25826] Starting all processes... -[2024-07-05 11:29:12,749][25826] Starting process learner_proc0 -[2024-07-05 11:29:12,798][25826] Starting all processes... -[2024-07-05 11:29:12,801][25826] Starting process inference_proc0-0 -[2024-07-05 11:29:12,802][25826] Starting process rollout_proc0 -[2024-07-05 11:29:12,802][25826] Starting process rollout_proc1 -[2024-07-05 11:29:12,803][25826] Starting process rollout_proc2 -[2024-07-05 11:29:12,803][25826] Starting process rollout_proc3 -[2024-07-05 11:29:12,803][25826] Starting process rollout_proc4 -[2024-07-05 11:29:12,803][25826] Starting process rollout_proc5 -[2024-07-05 11:29:12,804][25826] Starting process rollout_proc6 -[2024-07-05 11:29:12,804][25826] Starting process rollout_proc7 -[2024-07-05 11:29:12,805][25826] Starting process rollout_proc8 -[2024-07-05 11:29:12,805][25826] Starting process rollout_proc9 -[2024-07-05 11:29:12,806][25826] Starting process rollout_proc10 -[2024-07-05 11:29:12,807][25826] Starting process rollout_proc11 -[2024-07-05 11:29:12,807][25826] Starting process rollout_proc12 -[2024-07-05 11:29:12,809][25826] Starting process rollout_proc13 -[2024-07-05 11:29:12,812][25826] Starting process rollout_proc14 -[2024-07-05 11:29:12,832][25826] Starting process rollout_proc15 -[2024-07-05 11:29:16,644][38723] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:29:16,644][38723] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 11:29:16,834][38727] Worker 2 uses CPU cores [2] -[2024-07-05 11:29:16,840][38723] Num visible devices: 1 -[2024-07-05 11:29:16,860][38748] Worker 8 uses CPU cores [8] -[2024-07-05 11:29:16,876][38726] Worker 3 uses CPU cores [3] -[2024-07-05 11:29:16,910][38703] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:29:16,911][38703] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 11:29:16,944][38729] Worker 6 uses CPU cores [6] -[2024-07-05 11:29:17,019][38703] Num visible devices: 1 -[2024-07-05 11:29:17,024][38752] Worker 12 uses CPU cores [12] -[2024-07-05 11:29:17,028][38751] Worker 13 uses CPU cores [13] -[2024-07-05 11:29:17,057][38703] Setting fixed seed 200 -[2024-07-05 11:29:17,067][38703] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:29:17,068][38703] Initializing actor-critic model on device cuda:0 -[2024-07-05 11:29:17,068][38703] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:29:17,071][38703] RunningMeanStd input shape: (1,) -[2024-07-05 11:29:17,081][38703] ConvEncoder: input_channels=3 -[2024-07-05 11:29:17,108][38732] Worker 9 uses CPU cores [9] -[2024-07-05 11:29:17,128][38728] Worker 4 uses CPU cores [4] -[2024-07-05 11:29:17,162][38703] Conv encoder output size: 512 -[2024-07-05 11:29:17,162][38703] Policy head output size: 512 -[2024-07-05 11:29:17,164][38731] Worker 7 uses CPU cores [7] -[2024-07-05 11:29:17,171][38703] Created Actor Critic model with architecture: -[2024-07-05 11:29:17,171][38703] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) - ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) - ) - ) - ) - ) - (core): ModelCoreRNN( - (core): GRU(512, 512) - ) - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=5, bias=True) - ) -) -[2024-07-05 11:29:17,172][38730] Worker 5 uses CPU cores [5] -[2024-07-05 11:29:17,188][38749] Worker 10 uses CPU cores [10] -[2024-07-05 11:29:17,226][38724] Worker 0 uses CPU cores [0] -[2024-07-05 11:29:17,256][38750] Worker 11 uses CPU cores [11] -[2024-07-05 11:29:17,298][38703] Using optimizer -[2024-07-05 11:29:17,348][38753] Worker 15 uses CPU cores [15] -[2024-07-05 11:29:17,366][38754] Worker 14 uses CPU cores [14] -[2024-07-05 11:29:17,372][38725] Worker 1 uses CPU cores [1] -[2024-07-05 11:29:17,940][38703] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000008547_50012160.pth... -[2024-07-05 11:29:17,959][38703] Loading model from checkpoint -[2024-07-05 11:29:17,960][38703] Loaded experiment state at self.train_step=8547, self.env_steps=50012160 -[2024-07-05 11:29:17,961][38703] Initialized policy 0 weights for model version 8547 -[2024-07-05 11:29:17,962][38703] LearnerWorker_p0 finished initialization! -[2024-07-05 11:29:17,962][38703] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:29:18,027][38723] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:29:18,028][38723] RunningMeanStd input shape: (1,) -[2024-07-05 11:29:18,035][38723] ConvEncoder: input_channels=3 -[2024-07-05 11:29:18,090][38723] Conv encoder output size: 512 -[2024-07-05 11:29:18,090][38723] Policy head output size: 512 -[2024-07-05 11:29:18,122][25826] Inference worker 0-0 is ready! -[2024-07-05 11:29:18,123][25826] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 11:29:18,172][38725] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,172][38750] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,173][38753] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,173][38730] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,175][38724] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,175][38732] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,176][38748] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,176][38726] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,177][38749] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,180][38728] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,187][38731] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,190][38754] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,192][38727] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,191][38752] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,201][38729] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,199][38751] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:29:18,792][38725] Decorrelating experience for 0 frames... -[2024-07-05 11:29:18,793][38730] Decorrelating experience for 0 frames... -[2024-07-05 11:29:18,794][38753] Decorrelating experience for 0 frames... -[2024-07-05 11:29:18,797][38749] Decorrelating experience for 0 frames... -[2024-07-05 11:29:18,797][38750] Decorrelating experience for 0 frames... -[2024-07-05 11:29:18,797][38732] Decorrelating experience for 0 frames... -[2024-07-05 11:29:18,798][38748] Decorrelating experience for 0 frames... -[2024-07-05 11:29:18,958][38725] Decorrelating experience for 32 frames... -[2024-07-05 11:29:18,958][38753] Decorrelating experience for 32 frames... -[2024-07-05 11:29:18,996][38731] Decorrelating experience for 0 frames... -[2024-07-05 11:29:19,008][38751] Decorrelating experience for 0 frames... -[2024-07-05 11:29:19,021][38749] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,021][38750] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,021][38748] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,021][38732] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,033][38728] Decorrelating experience for 0 frames... -[2024-07-05 11:29:19,060][38730] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,122][38724] Decorrelating experience for 0 frames... -[2024-07-05 11:29:19,201][38750] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,203][38728] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,221][38751] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,243][38754] Decorrelating experience for 0 frames... -[2024-07-05 11:29:19,244][38752] Decorrelating experience for 0 frames... -[2024-07-05 11:29:19,257][38732] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,257][38748] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,276][38753] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,283][38726] Decorrelating experience for 0 frames... -[2024-07-05 11:29:19,401][38752] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,430][38728] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,436][38729] Decorrelating experience for 0 frames... -[2024-07-05 11:29:19,436][38724] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,437][38750] Decorrelating experience for 96 frames... -[2024-07-05 11:29:19,437][38726] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,440][38730] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,624][38749] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,649][38729] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,665][38754] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,665][38753] Decorrelating experience for 96 frames... -[2024-07-05 11:29:19,666][38731] Decorrelating experience for 32 frames... -[2024-07-05 11:29:19,673][38728] Decorrelating experience for 96 frames... -[2024-07-05 11:29:19,684][38730] Decorrelating experience for 96 frames... -[2024-07-05 11:29:19,685][25826] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 50012160. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 11:29:19,844][38725] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,855][38732] Decorrelating experience for 96 frames... -[2024-07-05 11:29:19,857][38751] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,857][38749] Decorrelating experience for 96 frames... -[2024-07-05 11:29:19,890][38731] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,901][38754] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,939][38729] Decorrelating experience for 64 frames... -[2024-07-05 11:29:19,992][38753] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,049][38751] Decorrelating experience for 96 frames... -[2024-07-05 11:29:20,080][38724] Decorrelating experience for 64 frames... -[2024-07-05 11:29:20,092][38725] Decorrelating experience for 96 frames... -[2024-07-05 11:29:20,097][38727] Decorrelating experience for 0 frames... -[2024-07-05 11:29:20,102][38728] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,119][38726] Decorrelating experience for 64 frames... -[2024-07-05 11:29:20,169][38749] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,172][38730] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,263][38748] Decorrelating experience for 96 frames... -[2024-07-05 11:29:20,266][38729] Decorrelating experience for 96 frames... -[2024-07-05 11:29:20,298][38754] Decorrelating experience for 96 frames... -[2024-07-05 11:29:20,303][38724] Decorrelating experience for 96 frames... -[2024-07-05 11:29:20,357][38753] Decorrelating experience for 160 frames... -[2024-07-05 11:29:20,357][38731] Decorrelating experience for 96 frames... -[2024-07-05 11:29:20,384][38749] Decorrelating experience for 160 frames... -[2024-07-05 11:29:20,397][38752] Decorrelating experience for 64 frames... -[2024-07-05 11:29:20,399][38727] Decorrelating experience for 32 frames... -[2024-07-05 11:29:20,480][38728] Decorrelating experience for 160 frames... -[2024-07-05 11:29:20,522][38726] Decorrelating experience for 96 frames... -[2024-07-05 11:29:20,552][38750] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,571][38748] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,584][38752] Decorrelating experience for 96 frames... -[2024-07-05 11:29:20,610][38729] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,639][38727] Decorrelating experience for 64 frames... -[2024-07-05 11:29:20,651][38753] Decorrelating experience for 192 frames... -[2024-07-05 11:29:20,698][38728] Decorrelating experience for 192 frames... -[2024-07-05 11:29:20,784][38749] Decorrelating experience for 192 frames... -[2024-07-05 11:29:20,797][38748] Decorrelating experience for 160 frames... -[2024-07-05 11:29:20,823][38754] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,827][38750] Decorrelating experience for 160 frames... -[2024-07-05 11:29:20,846][38752] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,869][38726] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,890][38727] Decorrelating experience for 96 frames... -[2024-07-05 11:29:20,905][38732] Decorrelating experience for 128 frames... -[2024-07-05 11:29:20,997][38729] Decorrelating experience for 160 frames... -[2024-07-05 11:29:21,014][38751] Decorrelating experience for 128 frames... -[2024-07-05 11:29:21,052][38753] Decorrelating experience for 224 frames... -[2024-07-05 11:29:21,076][38728] Decorrelating experience for 224 frames... -[2024-07-05 11:29:21,088][38754] Decorrelating experience for 160 frames... -[2024-07-05 11:29:21,107][38752] Decorrelating experience for 160 frames... -[2024-07-05 11:29:21,135][38731] Decorrelating experience for 128 frames... -[2024-07-05 11:29:21,192][38748] Decorrelating experience for 192 frames... -[2024-07-05 11:29:21,225][38730] Decorrelating experience for 160 frames... -[2024-07-05 11:29:21,241][38749] Decorrelating experience for 224 frames... -[2024-07-05 11:29:21,285][38729] Decorrelating experience for 192 frames... -[2024-07-05 11:29:21,305][38727] Decorrelating experience for 128 frames... -[2024-07-05 11:29:21,350][38732] Decorrelating experience for 160 frames... -[2024-07-05 11:29:21,421][38731] Decorrelating experience for 160 frames... -[2024-07-05 11:29:21,431][38751] Decorrelating experience for 160 frames... -[2024-07-05 11:29:21,438][38725] Decorrelating experience for 128 frames... -[2024-07-05 11:29:21,447][38730] Decorrelating experience for 192 frames... -[2024-07-05 11:29:21,478][38754] Decorrelating experience for 192 frames... -[2024-07-05 11:29:21,488][38752] Decorrelating experience for 192 frames... -[2024-07-05 11:29:21,560][38748] Decorrelating experience for 224 frames... -[2024-07-05 11:29:21,582][38729] Decorrelating experience for 224 frames... -[2024-07-05 11:29:21,611][38750] Decorrelating experience for 192 frames... -[2024-07-05 11:29:21,689][38751] Decorrelating experience for 192 frames... -[2024-07-05 11:29:21,706][38726] Decorrelating experience for 160 frames... -[2024-07-05 11:29:21,714][38725] Decorrelating experience for 160 frames... -[2024-07-05 11:29:21,773][38732] Decorrelating experience for 192 frames... -[2024-07-05 11:29:21,812][38731] Decorrelating experience for 192 frames... -[2024-07-05 11:29:21,872][38724] Decorrelating experience for 128 frames... -[2024-07-05 11:29:21,914][38752] Decorrelating experience for 224 frames... -[2024-07-05 11:29:21,945][38750] Decorrelating experience for 224 frames... -[2024-07-05 11:29:22,003][38730] Decorrelating experience for 224 frames... -[2024-07-05 11:29:22,026][38725] Decorrelating experience for 192 frames... -[2024-07-05 11:29:22,129][38754] Decorrelating experience for 224 frames... -[2024-07-05 11:29:22,156][38724] Decorrelating experience for 160 frames... -[2024-07-05 11:29:22,163][38726] Decorrelating experience for 192 frames... -[2024-07-05 11:29:22,165][38731] Decorrelating experience for 224 frames... -[2024-07-05 11:29:22,256][38727] Decorrelating experience for 160 frames... -[2024-07-05 11:29:22,381][38751] Decorrelating experience for 224 frames... -[2024-07-05 11:29:22,404][38732] Decorrelating experience for 224 frames... -[2024-07-05 11:29:22,436][38724] Decorrelating experience for 192 frames... -[2024-07-05 11:29:22,508][38726] Decorrelating experience for 224 frames... -[2024-07-05 11:29:22,580][38727] Decorrelating experience for 192 frames... -[2024-07-05 11:29:22,619][38725] Decorrelating experience for 224 frames... -[2024-07-05 11:29:22,633][38703] Signal inference workers to stop experience collection... -[2024-07-05 11:29:22,640][38723] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 11:29:22,746][38724] Decorrelating experience for 224 frames... -[2024-07-05 11:29:22,834][38727] Decorrelating experience for 224 frames... -[2024-07-05 11:29:24,033][38703] Signal inference workers to resume experience collection... -[2024-07-05 11:29:24,033][38723] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 11:29:24,685][25826] Fps is (10 sec: 4915.0, 60 sec: 4915.0, 300 sec: 4915.0). Total num frames: 50036736. Throughput: 0: 1184.8. Samples: 5924. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 11:29:24,686][25826] Avg episode reward: [(0, '2.600')] -[2024-07-05 11:29:25,876][38723] Updated weights for policy 0, policy_version 8557 (0.0098) -[2024-07-05 11:29:27,605][38723] Updated weights for policy 0, policy_version 8567 (0.0007) -[2024-07-05 11:29:29,316][38723] Updated weights for policy 0, policy_version 8577 (0.0008) -[2024-07-05 11:29:29,685][25826] Fps is (10 sec: 26214.3, 60 sec: 26214.3, 300 sec: 26214.3). Total num frames: 50274304. Throughput: 0: 3757.6. Samples: 37576. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 11:29:29,686][25826] Avg episode reward: [(0, '41.948')] -[2024-07-05 11:29:30,957][38723] Updated weights for policy 0, policy_version 8587 (0.0010) -[2024-07-05 11:29:32,678][38723] Updated weights for policy 0, policy_version 8597 (0.0011) -[2024-07-05 11:29:32,691][25826] Heartbeat connected on Batcher_0 -[2024-07-05 11:29:32,694][25826] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 11:29:32,704][25826] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 11:29:32,705][25826] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 11:29:32,707][25826] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 11:29:32,712][25826] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 11:29:32,715][25826] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 11:29:32,720][25826] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 11:29:32,721][25826] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 11:29:32,724][25826] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 11:29:32,728][25826] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 11:29:32,730][25826] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 11:29:32,733][25826] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 11:29:32,735][25826] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 11:29:32,741][25826] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 11:29:32,747][25826] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 11:29:32,748][25826] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 11:29:32,749][25826] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 11:29:32,757][25826] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 11:29:34,382][38723] Updated weights for policy 0, policy_version 8607 (0.0008) -[2024-07-05 11:29:34,685][25826] Fps is (10 sec: 47514.0, 60 sec: 33314.0, 300 sec: 33314.0). Total num frames: 50511872. Throughput: 0: 7376.8. Samples: 110652. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:29:34,686][25826] Avg episode reward: [(0, '41.051')] -[2024-07-05 11:29:36,049][38723] Updated weights for policy 0, policy_version 8617 (0.0008) -[2024-07-05 11:29:37,714][38723] Updated weights for policy 0, policy_version 8627 (0.0008) -[2024-07-05 11:29:39,457][38723] Updated weights for policy 0, policy_version 8637 (0.0008) -[2024-07-05 11:29:39,685][25826] Fps is (10 sec: 48332.8, 60 sec: 37273.5, 300 sec: 37273.5). Total num frames: 50757632. Throughput: 0: 9148.0. Samples: 182960. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:29:39,686][25826] Avg episode reward: [(0, '41.935')] -[2024-07-05 11:29:41,209][38723] Updated weights for policy 0, policy_version 8647 (0.0010) -[2024-07-05 11:29:42,894][38723] Updated weights for policy 0, policy_version 8657 (0.0007) -[2024-07-05 11:29:44,658][38723] Updated weights for policy 0, policy_version 8667 (0.0007) -[2024-07-05 11:29:44,685][25826] Fps is (10 sec: 48332.9, 60 sec: 39321.5, 300 sec: 39321.5). Total num frames: 50995200. Throughput: 0: 8746.9. Samples: 218672. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:29:44,686][25826] Avg episode reward: [(0, '42.375')] -[2024-07-05 11:29:46,487][38723] Updated weights for policy 0, policy_version 8677 (0.0008) -[2024-07-05 11:29:48,158][38723] Updated weights for policy 0, policy_version 8687 (0.0008) -[2024-07-05 11:29:49,685][25826] Fps is (10 sec: 46694.0, 60 sec: 40413.7, 300 sec: 40413.7). Total num frames: 51224576. Throughput: 0: 9633.0. Samples: 288992. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:29:49,686][25826] Avg episode reward: [(0, '40.447')] -[2024-07-05 11:29:49,846][38723] Updated weights for policy 0, policy_version 8697 (0.0008) -[2024-07-05 11:29:51,503][38723] Updated weights for policy 0, policy_version 8707 (0.0008) -[2024-07-05 11:29:53,199][38723] Updated weights for policy 0, policy_version 8717 (0.0009) -[2024-07-05 11:29:54,685][25826] Fps is (10 sec: 48332.9, 60 sec: 41896.2, 300 sec: 41896.2). Total num frames: 51478528. Throughput: 0: 10364.2. Samples: 362748. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:29:54,686][25826] Avg episode reward: [(0, '43.297')] -[2024-07-05 11:29:54,819][38723] Updated weights for policy 0, policy_version 8727 (0.0008) -[2024-07-05 11:29:56,441][38723] Updated weights for policy 0, policy_version 8737 (0.0008) -[2024-07-05 11:29:58,076][38723] Updated weights for policy 0, policy_version 8747 (0.0008) -[2024-07-05 11:29:59,685][25826] Fps is (10 sec: 49971.5, 60 sec: 42803.1, 300 sec: 42803.1). Total num frames: 51724288. Throughput: 0: 10002.4. Samples: 400096. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:29:59,686][25826] Avg episode reward: [(0, '41.127')] -[2024-07-05 11:29:59,748][38723] Updated weights for policy 0, policy_version 8757 (0.0008) -[2024-07-05 11:30:01,393][38723] Updated weights for policy 0, policy_version 8767 (0.0008) -[2024-07-05 11:30:02,983][38723] Updated weights for policy 0, policy_version 8777 (0.0009) -[2024-07-05 11:30:04,598][38723] Updated weights for policy 0, policy_version 8787 (0.0008) -[2024-07-05 11:30:04,685][25826] Fps is (10 sec: 49971.3, 60 sec: 43690.6, 300 sec: 43690.6). Total num frames: 51978240. Throughput: 0: 10563.5. Samples: 475360. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:30:04,686][25826] Avg episode reward: [(0, '41.260')] -[2024-07-05 11:30:06,214][38723] Updated weights for policy 0, policy_version 8797 (0.0008) -[2024-07-05 11:30:07,838][38723] Updated weights for policy 0, policy_version 8807 (0.0007) -[2024-07-05 11:30:09,468][38723] Updated weights for policy 0, policy_version 8817 (0.0008) -[2024-07-05 11:30:09,686][25826] Fps is (10 sec: 50781.8, 60 sec: 44399.1, 300 sec: 44399.1). Total num frames: 52232192. Throughput: 0: 12115.5. Samples: 551140. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:30:09,687][25826] Avg episode reward: [(0, '42.418')] -[2024-07-05 11:30:11,115][38723] Updated weights for policy 0, policy_version 8827 (0.0007) -[2024-07-05 11:30:12,781][38723] Updated weights for policy 0, policy_version 8837 (0.0009) -[2024-07-05 11:30:14,456][38723] Updated weights for policy 0, policy_version 8847 (0.0008) -[2024-07-05 11:30:14,685][25826] Fps is (10 sec: 49971.2, 60 sec: 44832.6, 300 sec: 44832.6). Total num frames: 52477952. Throughput: 0: 12240.5. Samples: 588400. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:30:14,686][25826] Avg episode reward: [(0, '41.754')] -[2024-07-05 11:30:16,131][38723] Updated weights for policy 0, policy_version 8857 (0.0008) -[2024-07-05 11:30:17,842][38723] Updated weights for policy 0, policy_version 8867 (0.0008) -[2024-07-05 11:30:19,528][38723] Updated weights for policy 0, policy_version 8877 (0.0007) -[2024-07-05 11:30:19,685][25826] Fps is (10 sec: 49159.7, 60 sec: 45192.4, 300 sec: 45192.4). Total num frames: 52723712. Throughput: 0: 12246.8. Samples: 661760. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:30:19,686][25826] Avg episode reward: [(0, '37.754')] -[2024-07-05 11:30:21,177][38723] Updated weights for policy 0, policy_version 8887 (0.0008) -[2024-07-05 11:30:22,835][38723] Updated weights for policy 0, policy_version 8897 (0.0008) -[2024-07-05 11:30:24,481][38723] Updated weights for policy 0, policy_version 8907 (0.0009) -[2024-07-05 11:30:24,685][25826] Fps is (10 sec: 49151.9, 60 sec: 48879.0, 300 sec: 45497.1). Total num frames: 52969472. Throughput: 0: 12288.2. Samples: 735928. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:30:24,686][25826] Avg episode reward: [(0, '40.017')] -[2024-07-05 11:30:26,257][38723] Updated weights for policy 0, policy_version 8917 (0.0008) -[2024-07-05 11:30:27,921][38723] Updated weights for policy 0, policy_version 8927 (0.0007) -[2024-07-05 11:30:29,563][38723] Updated weights for policy 0, policy_version 8937 (0.0008) -[2024-07-05 11:30:29,685][25826] Fps is (10 sec: 48333.7, 60 sec: 48879.0, 300 sec: 45641.1). Total num frames: 53207040. Throughput: 0: 12271.0. Samples: 770868. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:30:29,686][25826] Avg episode reward: [(0, '41.130')] -[2024-07-05 11:30:31,446][38723] Updated weights for policy 0, policy_version 8947 (0.0012) -[2024-07-05 11:30:33,259][38723] Updated weights for policy 0, policy_version 8957 (0.0011) -[2024-07-05 11:30:34,685][25826] Fps is (10 sec: 46694.4, 60 sec: 48742.4, 300 sec: 45656.7). Total num frames: 53436416. Throughput: 0: 12257.0. Samples: 840556. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:30:34,686][25826] Avg episode reward: [(0, '38.117')] -[2024-07-05 11:30:34,973][38723] Updated weights for policy 0, policy_version 8967 (0.0008) -[2024-07-05 11:30:36,631][38723] Updated weights for policy 0, policy_version 8977 (0.0008) -[2024-07-05 11:30:38,296][38723] Updated weights for policy 0, policy_version 8987 (0.0010) -[2024-07-05 11:30:39,685][25826] Fps is (10 sec: 47512.4, 60 sec: 48742.2, 300 sec: 45875.1). Total num frames: 53682176. Throughput: 0: 12247.4. Samples: 913884. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:30:39,686][25826] Avg episode reward: [(0, '41.987')] -[2024-07-05 11:30:39,962][38723] Updated weights for policy 0, policy_version 8997 (0.0007) -[2024-07-05 11:30:41,606][38723] Updated weights for policy 0, policy_version 9007 (0.0010) -[2024-07-05 11:30:43,337][38723] Updated weights for policy 0, policy_version 9017 (0.0008) -[2024-07-05 11:30:44,685][25826] Fps is (10 sec: 49152.2, 60 sec: 48879.0, 300 sec: 46068.0). Total num frames: 53927936. Throughput: 0: 12239.3. Samples: 950864. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:30:44,686][25826] Avg episode reward: [(0, '42.187')] -[2024-07-05 11:30:45,018][38723] Updated weights for policy 0, policy_version 9027 (0.0010) -[2024-07-05 11:30:46,713][38723] Updated weights for policy 0, policy_version 9037 (0.0008) -[2024-07-05 11:30:48,377][38723] Updated weights for policy 0, policy_version 9047 (0.0007) -[2024-07-05 11:30:49,685][25826] Fps is (10 sec: 48333.8, 60 sec: 49015.5, 300 sec: 46148.3). Total num frames: 54165504. Throughput: 0: 12192.0. Samples: 1024000. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:30:49,686][25826] Avg episode reward: [(0, '40.449')] -[2024-07-05 11:30:50,035][38723] Updated weights for policy 0, policy_version 9057 (0.0008) -[2024-07-05 11:30:51,716][38723] Updated weights for policy 0, policy_version 9067 (0.0010) -[2024-07-05 11:30:53,345][38723] Updated weights for policy 0, policy_version 9077 (0.0008) -[2024-07-05 11:30:54,685][25826] Fps is (10 sec: 48333.0, 60 sec: 48879.0, 300 sec: 46306.4). Total num frames: 54411264. Throughput: 0: 12145.4. Samples: 1097660. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:30:54,686][25826] Avg episode reward: [(0, '42.317')] -[2024-07-05 11:30:55,032][38723] Updated weights for policy 0, policy_version 9087 (0.0008) -[2024-07-05 11:30:56,693][38723] Updated weights for policy 0, policy_version 9097 (0.0008) -[2024-07-05 11:30:58,333][38723] Updated weights for policy 0, policy_version 9107 (0.0009) -[2024-07-05 11:30:59,685][25826] Fps is (10 sec: 49151.9, 60 sec: 48879.0, 300 sec: 46448.6). Total num frames: 54657024. Throughput: 0: 12133.1. Samples: 1134392. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:30:59,686][25826] Avg episode reward: [(0, '44.633')] -[2024-07-05 11:31:00,018][38723] Updated weights for policy 0, policy_version 9117 (0.0008) -[2024-07-05 11:31:01,692][38723] Updated weights for policy 0, policy_version 9127 (0.0008) -[2024-07-05 11:31:03,360][38723] Updated weights for policy 0, policy_version 9137 (0.0007) -[2024-07-05 11:31:04,685][25826] Fps is (10 sec: 49151.9, 60 sec: 48742.4, 300 sec: 46577.4). Total num frames: 54902784. Throughput: 0: 12144.5. Samples: 1208260. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:31:04,686][25826] Avg episode reward: [(0, '44.489')] -[2024-07-05 11:31:05,048][38723] Updated weights for policy 0, policy_version 9147 (0.0010) -[2024-07-05 11:31:06,690][38723] Updated weights for policy 0, policy_version 9157 (0.0012) -[2024-07-05 11:31:08,355][38723] Updated weights for policy 0, policy_version 9167 (0.0007) -[2024-07-05 11:31:09,685][25826] Fps is (10 sec: 49151.1, 60 sec: 48607.1, 300 sec: 46694.3). Total num frames: 55148544. Throughput: 0: 12132.8. Samples: 1281908. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:31:09,686][25826] Avg episode reward: [(0, '45.678')] -[2024-07-05 11:31:09,699][38703] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000009175_55156736.pth... -[2024-07-05 11:31:09,778][38703] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000008166_46891008.pth -[2024-07-05 11:31:09,781][38703] Saving new best policy, reward=45.678! -[2024-07-05 11:31:10,057][38723] Updated weights for policy 0, policy_version 9177 (0.0008) -[2024-07-05 11:31:11,752][38723] Updated weights for policy 0, policy_version 9187 (0.0008) -[2024-07-05 11:31:13,453][38723] Updated weights for policy 0, policy_version 9197 (0.0008) -[2024-07-05 11:31:14,685][25826] Fps is (10 sec: 49151.7, 60 sec: 48605.8, 300 sec: 46801.2). Total num frames: 55394304. Throughput: 0: 12156.9. Samples: 1317928. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:31:14,686][25826] Avg episode reward: [(0, '44.333')] -[2024-07-05 11:31:15,148][38723] Updated weights for policy 0, policy_version 9207 (0.0010) -[2024-07-05 11:31:16,796][38723] Updated weights for policy 0, policy_version 9217 (0.0010) -[2024-07-05 11:31:18,465][38723] Updated weights for policy 0, policy_version 9227 (0.0011) -[2024-07-05 11:31:19,685][25826] Fps is (10 sec: 49152.7, 60 sec: 48606.0, 300 sec: 46899.2). Total num frames: 55640064. Throughput: 0: 12237.7. Samples: 1391252. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:31:19,686][25826] Avg episode reward: [(0, '40.940')] -[2024-07-05 11:31:20,166][38723] Updated weights for policy 0, policy_version 9237 (0.0010) -[2024-07-05 11:31:21,818][38723] Updated weights for policy 0, policy_version 9247 (0.0011) -[2024-07-05 11:31:23,501][38723] Updated weights for policy 0, policy_version 9257 (0.0009) -[2024-07-05 11:31:24,685][25826] Fps is (10 sec: 49151.3, 60 sec: 48605.7, 300 sec: 46989.2). Total num frames: 55885824. Throughput: 0: 12235.5. Samples: 1464480. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:31:24,686][25826] Avg episode reward: [(0, '43.398')] -[2024-07-05 11:31:25,171][38723] Updated weights for policy 0, policy_version 9267 (0.0007) -[2024-07-05 11:31:26,822][38723] Updated weights for policy 0, policy_version 9277 (0.0010) -[2024-07-05 11:31:28,495][38723] Updated weights for policy 0, policy_version 9287 (0.0008) -[2024-07-05 11:31:29,685][25826] Fps is (10 sec: 48333.3, 60 sec: 48605.9, 300 sec: 47009.5). Total num frames: 56123392. Throughput: 0: 12232.3. Samples: 1501316. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:31:29,686][25826] Avg episode reward: [(0, '42.308')] -[2024-07-05 11:31:30,161][38723] Updated weights for policy 0, policy_version 9297 (0.0008) -[2024-07-05 11:31:31,847][38723] Updated weights for policy 0, policy_version 9307 (0.0007) -[2024-07-05 11:31:33,566][38723] Updated weights for policy 0, policy_version 9317 (0.0008) -[2024-07-05 11:31:34,685][25826] Fps is (10 sec: 48333.3, 60 sec: 48878.9, 300 sec: 47088.8). Total num frames: 56369152. Throughput: 0: 12230.1. Samples: 1574356. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:31:34,686][25826] Avg episode reward: [(0, '46.047')] -[2024-07-05 11:31:34,687][38703] Saving new best policy, reward=46.047! -[2024-07-05 11:31:35,284][38723] Updated weights for policy 0, policy_version 9327 (0.0007) -[2024-07-05 11:31:37,025][38723] Updated weights for policy 0, policy_version 9337 (0.0011) -[2024-07-05 11:31:38,748][38723] Updated weights for policy 0, policy_version 9347 (0.0012) -[2024-07-05 11:31:39,685][25826] Fps is (10 sec: 48332.4, 60 sec: 48742.5, 300 sec: 47104.0). Total num frames: 56606720. Throughput: 0: 12177.6. Samples: 1645652. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:31:39,686][25826] Avg episode reward: [(0, '44.094')] -[2024-07-05 11:31:40,404][38723] Updated weights for policy 0, policy_version 9357 (0.0007) -[2024-07-05 11:31:42,127][38723] Updated weights for policy 0, policy_version 9367 (0.0008) -[2024-07-05 11:31:43,831][38723] Updated weights for policy 0, policy_version 9377 (0.0010) -[2024-07-05 11:31:44,685][25826] Fps is (10 sec: 47513.6, 60 sec: 48605.8, 300 sec: 47118.1). Total num frames: 56844288. Throughput: 0: 12159.5. Samples: 1681568. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:31:44,686][25826] Avg episode reward: [(0, '44.232')] -[2024-07-05 11:31:45,566][38723] Updated weights for policy 0, policy_version 9387 (0.0011) -[2024-07-05 11:31:47,226][38723] Updated weights for policy 0, policy_version 9397 (0.0008) -[2024-07-05 11:31:48,907][38723] Updated weights for policy 0, policy_version 9407 (0.0009) -[2024-07-05 11:31:49,685][25826] Fps is (10 sec: 48333.3, 60 sec: 48742.5, 300 sec: 47185.9). Total num frames: 57090048. Throughput: 0: 12130.9. Samples: 1754148. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:31:49,686][25826] Avg episode reward: [(0, '44.739')] -[2024-07-05 11:31:50,594][38723] Updated weights for policy 0, policy_version 9417 (0.0009) -[2024-07-05 11:31:52,255][38723] Updated weights for policy 0, policy_version 9427 (0.0008) -[2024-07-05 11:31:53,911][38723] Updated weights for policy 0, policy_version 9437 (0.0008) -[2024-07-05 11:31:54,685][25826] Fps is (10 sec: 49152.2, 60 sec: 48742.3, 300 sec: 47249.3). Total num frames: 57335808. Throughput: 0: 12130.6. Samples: 1827784. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:31:54,686][25826] Avg episode reward: [(0, '42.822')] -[2024-07-05 11:31:55,668][38723] Updated weights for policy 0, policy_version 9447 (0.0008) -[2024-07-05 11:31:57,334][38723] Updated weights for policy 0, policy_version 9457 (0.0010) -[2024-07-05 11:31:59,045][38723] Updated weights for policy 0, policy_version 9467 (0.0007) -[2024-07-05 11:31:59,685][25826] Fps is (10 sec: 48332.4, 60 sec: 48605.9, 300 sec: 47257.6). Total num frames: 57573376. Throughput: 0: 12129.8. Samples: 1863768. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:31:59,686][25826] Avg episode reward: [(0, '42.452')] -[2024-07-05 11:32:00,815][38723] Updated weights for policy 0, policy_version 9477 (0.0010) -[2024-07-05 11:32:02,547][38723] Updated weights for policy 0, policy_version 9487 (0.0008) -[2024-07-05 11:32:04,228][38723] Updated weights for policy 0, policy_version 9497 (0.0007) -[2024-07-05 11:32:04,685][25826] Fps is (10 sec: 47513.0, 60 sec: 48469.2, 300 sec: 47265.3). Total num frames: 57810944. Throughput: 0: 12082.2. Samples: 1934952. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:32:04,686][25826] Avg episode reward: [(0, '42.779')] -[2024-07-05 11:32:06,025][38723] Updated weights for policy 0, policy_version 9507 (0.0008) -[2024-07-05 11:32:07,725][38723] Updated weights for policy 0, policy_version 9517 (0.0008) -[2024-07-05 11:32:09,573][38723] Updated weights for policy 0, policy_version 9527 (0.0012) -[2024-07-05 11:32:09,685][25826] Fps is (10 sec: 46694.2, 60 sec: 48196.4, 300 sec: 47224.5). Total num frames: 58040320. Throughput: 0: 11994.3. Samples: 2004220. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:32:09,686][25826] Avg episode reward: [(0, '42.466')] -[2024-07-05 11:32:11,449][38723] Updated weights for policy 0, policy_version 9537 (0.0009) -[2024-07-05 11:32:13,338][38723] Updated weights for policy 0, policy_version 9547 (0.0010) -[2024-07-05 11:32:14,685][25826] Fps is (10 sec: 45055.6, 60 sec: 47786.5, 300 sec: 47139.0). Total num frames: 58261504. Throughput: 0: 11915.4. Samples: 2037512. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:32:14,686][25826] Avg episode reward: [(0, '41.732')] -[2024-07-05 11:32:15,210][38723] Updated weights for policy 0, policy_version 9557 (0.0008) -[2024-07-05 11:32:17,141][38723] Updated weights for policy 0, policy_version 9567 (0.0011) -[2024-07-05 11:32:19,064][38723] Updated weights for policy 0, policy_version 9577 (0.0010) -[2024-07-05 11:32:19,685][25826] Fps is (10 sec: 43417.1, 60 sec: 47240.5, 300 sec: 47012.9). Total num frames: 58474496. Throughput: 0: 11730.6. Samples: 2102236. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:32:19,686][25826] Avg episode reward: [(0, '42.850')] -[2024-07-05 11:32:20,907][38723] Updated weights for policy 0, policy_version 9587 (0.0009) -[2024-07-05 11:32:22,724][38723] Updated weights for policy 0, policy_version 9597 (0.0011) -[2024-07-05 11:32:24,467][38723] Updated weights for policy 0, policy_version 9607 (0.0011) -[2024-07-05 11:32:24,687][25826] Fps is (10 sec: 43410.6, 60 sec: 46829.6, 300 sec: 46937.5). Total num frames: 58695680. Throughput: 0: 11627.3. Samples: 2168900. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:32:24,695][25826] Avg episode reward: [(0, '45.385')] -[2024-07-05 11:32:26,254][38723] Updated weights for policy 0, policy_version 9617 (0.0008) -[2024-07-05 11:32:28,057][38723] Updated weights for policy 0, policy_version 9627 (0.0010) -[2024-07-05 11:32:29,685][25826] Fps is (10 sec: 45874.9, 60 sec: 46830.7, 300 sec: 46953.0). Total num frames: 58933248. Throughput: 0: 11605.5. Samples: 2203816. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:32:29,686][25826] Avg episode reward: [(0, '39.768')] -[2024-07-05 11:32:29,884][38723] Updated weights for policy 0, policy_version 9637 (0.0011) -[2024-07-05 11:32:31,665][38723] Updated weights for policy 0, policy_version 9647 (0.0011) -[2024-07-05 11:32:33,378][38723] Updated weights for policy 0, policy_version 9657 (0.0008) -[2024-07-05 11:32:34,685][25826] Fps is (10 sec: 46703.2, 60 sec: 46557.9, 300 sec: 46925.5). Total num frames: 59162624. Throughput: 0: 11533.7. Samples: 2273164. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:32:34,686][25826] Avg episode reward: [(0, '39.758')] -[2024-07-05 11:32:35,176][38723] Updated weights for policy 0, policy_version 9667 (0.0007) -[2024-07-05 11:32:36,970][38723] Updated weights for policy 0, policy_version 9677 (0.0008) -[2024-07-05 11:32:38,735][38723] Updated weights for policy 0, policy_version 9687 (0.0010) -[2024-07-05 11:32:39,685][25826] Fps is (10 sec: 45873.6, 60 sec: 46420.9, 300 sec: 46899.1). Total num frames: 59392000. Throughput: 0: 11425.6. Samples: 2341944. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:32:39,687][25826] Avg episode reward: [(0, '41.020')] -[2024-07-05 11:32:40,506][38723] Updated weights for policy 0, policy_version 9697 (0.0008) -[2024-07-05 11:32:42,257][38723] Updated weights for policy 0, policy_version 9707 (0.0010) -[2024-07-05 11:32:44,020][38723] Updated weights for policy 0, policy_version 9717 (0.0008) -[2024-07-05 11:32:44,685][25826] Fps is (10 sec: 45875.2, 60 sec: 46284.9, 300 sec: 46874.2). Total num frames: 59621376. Throughput: 0: 11398.1. Samples: 2376680. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:32:44,686][25826] Avg episode reward: [(0, '42.325')] -[2024-07-05 11:32:45,816][38723] Updated weights for policy 0, policy_version 9727 (0.0008) -[2024-07-05 11:32:47,603][38723] Updated weights for policy 0, policy_version 9737 (0.0009) -[2024-07-05 11:32:49,388][38723] Updated weights for policy 0, policy_version 9747 (0.0008) -[2024-07-05 11:32:49,685][25826] Fps is (10 sec: 45876.7, 60 sec: 46011.5, 300 sec: 46850.4). Total num frames: 59850752. Throughput: 0: 11351.1. Samples: 2445752. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:32:49,686][25826] Avg episode reward: [(0, '43.951')] -[2024-07-05 11:32:51,170][38723] Updated weights for policy 0, policy_version 9757 (0.0009) -[2024-07-05 11:32:52,966][38723] Updated weights for policy 0, policy_version 9767 (0.0012) -[2024-07-05 11:32:54,685][25826] Fps is (10 sec: 45875.2, 60 sec: 45738.7, 300 sec: 46827.8). Total num frames: 60080128. Throughput: 0: 11344.0. Samples: 2514700. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:32:54,685][25826] Avg episode reward: [(0, '40.332')] -[2024-07-05 11:32:54,750][38723] Updated weights for policy 0, policy_version 9777 (0.0008) -[2024-07-05 11:32:56,515][38723] Updated weights for policy 0, policy_version 9787 (0.0008) -[2024-07-05 11:32:58,243][38723] Updated weights for policy 0, policy_version 9797 (0.0008) -[2024-07-05 11:32:59,685][25826] Fps is (10 sec: 46694.8, 60 sec: 45738.6, 300 sec: 46843.3). Total num frames: 60317696. Throughput: 0: 11377.8. Samples: 2549512. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:32:59,686][25826] Avg episode reward: [(0, '42.735')] -[2024-07-05 11:33:00,056][38723] Updated weights for policy 0, policy_version 9807 (0.0008) -[2024-07-05 11:33:01,807][38723] Updated weights for policy 0, policy_version 9817 (0.0008) -[2024-07-05 11:33:03,552][38723] Updated weights for policy 0, policy_version 9827 (0.0007) -[2024-07-05 11:33:04,685][25826] Fps is (10 sec: 46694.1, 60 sec: 45602.2, 300 sec: 46821.8). Total num frames: 60547072. Throughput: 0: 11473.1. Samples: 2618524. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:33:04,686][25826] Avg episode reward: [(0, '42.950')] -[2024-07-05 11:33:05,334][38723] Updated weights for policy 0, policy_version 9837 (0.0008) -[2024-07-05 11:33:07,127][38723] Updated weights for policy 0, policy_version 9847 (0.0010) -[2024-07-05 11:33:08,908][38723] Updated weights for policy 0, policy_version 9857 (0.0008) -[2024-07-05 11:33:09,685][25826] Fps is (10 sec: 45874.4, 60 sec: 45601.9, 300 sec: 46801.2). Total num frames: 60776448. Throughput: 0: 11535.4. Samples: 2687976. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:33:09,686][25826] Avg episode reward: [(0, '43.061')] -[2024-07-05 11:33:09,701][38703] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000009861_60776448.pth... -[2024-07-05 11:33:09,780][38703] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000008547_50012160.pth -[2024-07-05 11:33:10,769][38723] Updated weights for policy 0, policy_version 9867 (0.0008) -[2024-07-05 11:33:12,512][38723] Updated weights for policy 0, policy_version 9877 (0.0008) -[2024-07-05 11:33:14,292][38723] Updated weights for policy 0, policy_version 9887 (0.0010) -[2024-07-05 11:33:14,685][25826] Fps is (10 sec: 45874.8, 60 sec: 45738.8, 300 sec: 46781.5). Total num frames: 61005824. Throughput: 0: 11526.0. Samples: 2722484. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:33:14,686][25826] Avg episode reward: [(0, '42.037')] -[2024-07-05 11:33:16,113][38723] Updated weights for policy 0, policy_version 9897 (0.0008) -[2024-07-05 11:33:17,853][38723] Updated weights for policy 0, policy_version 9907 (0.0009) -[2024-07-05 11:33:19,644][38723] Updated weights for policy 0, policy_version 9917 (0.0010) -[2024-07-05 11:33:19,685][25826] Fps is (10 sec: 45876.4, 60 sec: 46011.8, 300 sec: 46762.7). Total num frames: 61235200. Throughput: 0: 11513.5. Samples: 2791272. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:33:19,686][25826] Avg episode reward: [(0, '42.332')] -[2024-07-05 11:33:21,398][38723] Updated weights for policy 0, policy_version 9927 (0.0011) -[2024-07-05 11:33:23,211][38723] Updated weights for policy 0, policy_version 9937 (0.0008) -[2024-07-05 11:33:24,685][25826] Fps is (10 sec: 45875.8, 60 sec: 46149.7, 300 sec: 46744.6). Total num frames: 61464576. Throughput: 0: 11508.4. Samples: 2859816. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:33:24,686][25826] Avg episode reward: [(0, '44.534')] -[2024-07-05 11:33:24,964][38723] Updated weights for policy 0, policy_version 9947 (0.0008) -[2024-07-05 11:33:26,751][38723] Updated weights for policy 0, policy_version 9957 (0.0009) -[2024-07-05 11:33:28,518][38723] Updated weights for policy 0, policy_version 9967 (0.0010) -[2024-07-05 11:33:29,685][25826] Fps is (10 sec: 45874.3, 60 sec: 46011.7, 300 sec: 46727.1). Total num frames: 61693952. Throughput: 0: 11500.9. Samples: 2894224. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:33:29,686][25826] Avg episode reward: [(0, '45.644')] -[2024-07-05 11:33:30,335][38723] Updated weights for policy 0, policy_version 9977 (0.0008) -[2024-07-05 11:33:32,137][38723] Updated weights for policy 0, policy_version 9987 (0.0012) -[2024-07-05 11:33:33,958][38723] Updated weights for policy 0, policy_version 9997 (0.0008) -[2024-07-05 11:33:34,685][25826] Fps is (10 sec: 45875.2, 60 sec: 46011.7, 300 sec: 46710.5). Total num frames: 61923328. Throughput: 0: 11497.2. Samples: 2963124. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:33:34,686][25826] Avg episode reward: [(0, '45.409')] -[2024-07-05 11:33:35,714][38723] Updated weights for policy 0, policy_version 10007 (0.0009) -[2024-07-05 11:33:37,502][38723] Updated weights for policy 0, policy_version 10017 (0.0008) -[2024-07-05 11:33:39,288][38723] Updated weights for policy 0, policy_version 10027 (0.0010) -[2024-07-05 11:33:39,685][25826] Fps is (10 sec: 45876.2, 60 sec: 46012.1, 300 sec: 46694.4). Total num frames: 62152704. Throughput: 0: 11497.2. Samples: 3032076. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:33:39,686][25826] Avg episode reward: [(0, '40.477')] -[2024-07-05 11:33:41,053][38723] Updated weights for policy 0, policy_version 10037 (0.0008) -[2024-07-05 11:33:42,855][38723] Updated weights for policy 0, policy_version 10047 (0.0012) -[2024-07-05 11:33:44,685][38723] Updated weights for policy 0, policy_version 10057 (0.0010) -[2024-07-05 11:33:44,685][25826] Fps is (10 sec: 45874.4, 60 sec: 46011.6, 300 sec: 46678.9). Total num frames: 62382080. Throughput: 0: 11485.4. Samples: 3066356. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:33:44,686][25826] Avg episode reward: [(0, '41.428')] -[2024-07-05 11:33:46,404][38723] Updated weights for policy 0, policy_version 10067 (0.0007) -[2024-07-05 11:33:48,139][38723] Updated weights for policy 0, policy_version 10077 (0.0008) -[2024-07-05 11:33:49,685][25826] Fps is (10 sec: 46694.7, 60 sec: 46148.5, 300 sec: 46694.4). Total num frames: 62619648. Throughput: 0: 11501.7. Samples: 3136100. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:33:49,685][25826] Avg episode reward: [(0, '44.278')] -[2024-07-05 11:33:49,815][38723] Updated weights for policy 0, policy_version 10087 (0.0008) -[2024-07-05 11:33:51,512][38723] Updated weights for policy 0, policy_version 10097 (0.0013) -[2024-07-05 11:33:53,219][38723] Updated weights for policy 0, policy_version 10107 (0.0009) -[2024-07-05 11:33:54,685][25826] Fps is (10 sec: 47514.0, 60 sec: 46284.7, 300 sec: 46709.3). Total num frames: 62857216. Throughput: 0: 11564.9. Samples: 3208392. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:33:54,686][25826] Avg episode reward: [(0, '45.328')] -[2024-07-05 11:33:54,937][38723] Updated weights for policy 0, policy_version 10117 (0.0009) -[2024-07-05 11:33:56,714][38723] Updated weights for policy 0, policy_version 10127 (0.0007) -[2024-07-05 11:33:58,566][38723] Updated weights for policy 0, policy_version 10137 (0.0008) -[2024-07-05 11:33:59,685][25826] Fps is (10 sec: 45875.1, 60 sec: 46011.9, 300 sec: 46665.1). Total num frames: 63078400. Throughput: 0: 11564.8. Samples: 3242900. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:33:59,686][25826] Avg episode reward: [(0, '43.491')] -[2024-07-05 11:34:00,438][38723] Updated weights for policy 0, policy_version 10147 (0.0008) -[2024-07-05 11:34:02,254][38723] Updated weights for policy 0, policy_version 10157 (0.0009) -[2024-07-05 11:34:04,108][38723] Updated weights for policy 0, policy_version 10167 (0.0009) -[2024-07-05 11:34:04,685][25826] Fps is (10 sec: 45055.4, 60 sec: 46011.6, 300 sec: 46651.2). Total num frames: 63307776. Throughput: 0: 11526.8. Samples: 3309980. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:34:04,686][25826] Avg episode reward: [(0, '44.719')] -[2024-07-05 11:34:05,959][38723] Updated weights for policy 0, policy_version 10177 (0.0008) -[2024-07-05 11:34:07,773][38723] Updated weights for policy 0, policy_version 10187 (0.0008) -[2024-07-05 11:34:09,573][38723] Updated weights for policy 0, policy_version 10197 (0.0008) -[2024-07-05 11:34:09,685][25826] Fps is (10 sec: 45055.8, 60 sec: 45875.4, 300 sec: 46609.6). Total num frames: 63528960. Throughput: 0: 11477.8. Samples: 3376320. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:34:09,686][25826] Avg episode reward: [(0, '46.024')] -[2024-07-05 11:34:11,472][38723] Updated weights for policy 0, policy_version 10207 (0.0008) -[2024-07-05 11:34:13,320][38723] Updated weights for policy 0, policy_version 10217 (0.0013) -[2024-07-05 11:34:14,685][25826] Fps is (10 sec: 44236.2, 60 sec: 45738.5, 300 sec: 46569.4). Total num frames: 63750144. Throughput: 0: 11454.9. Samples: 3409696. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:34:14,686][25826] Avg episode reward: [(0, '45.639')] -[2024-07-05 11:34:15,186][38723] Updated weights for policy 0, policy_version 10227 (0.0008) -[2024-07-05 11:34:16,999][38723] Updated weights for policy 0, policy_version 10237 (0.0008) -[2024-07-05 11:34:18,765][38723] Updated weights for policy 0, policy_version 10247 (0.0008) -[2024-07-05 11:34:19,685][25826] Fps is (10 sec: 45056.3, 60 sec: 45738.7, 300 sec: 47263.7). Total num frames: 63979520. Throughput: 0: 11415.1. Samples: 3476804. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:34:19,686][25826] Avg episode reward: [(0, '44.535')] -[2024-07-05 11:34:20,556][38723] Updated weights for policy 0, policy_version 10257 (0.0007) -[2024-07-05 11:34:22,341][38723] Updated weights for policy 0, policy_version 10267 (0.0008) -[2024-07-05 11:34:24,113][38723] Updated weights for policy 0, policy_version 10277 (0.0011) -[2024-07-05 11:34:24,685][25826] Fps is (10 sec: 45876.8, 60 sec: 45738.7, 300 sec: 47235.9). Total num frames: 64208896. Throughput: 0: 11418.9. Samples: 3545928. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:34:24,686][25826] Avg episode reward: [(0, '41.184')] -[2024-07-05 11:34:25,916][38723] Updated weights for policy 0, policy_version 10287 (0.0008) -[2024-07-05 11:34:27,679][38723] Updated weights for policy 0, policy_version 10297 (0.0009) -[2024-07-05 11:34:29,438][38723] Updated weights for policy 0, policy_version 10307 (0.0008) -[2024-07-05 11:34:29,685][25826] Fps is (10 sec: 45874.6, 60 sec: 45738.8, 300 sec: 47208.1). Total num frames: 64438272. Throughput: 0: 11418.1. Samples: 3580168. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:34:29,686][25826] Avg episode reward: [(0, '40.963')] -[2024-07-05 11:34:31,248][38723] Updated weights for policy 0, policy_version 10317 (0.0008) -[2024-07-05 11:34:33,024][38723] Updated weights for policy 0, policy_version 10327 (0.0011) -[2024-07-05 11:34:34,685][25826] Fps is (10 sec: 45875.1, 60 sec: 45738.7, 300 sec: 47152.6). Total num frames: 64667648. Throughput: 0: 11405.8. Samples: 3649360. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:34:34,686][25826] Avg episode reward: [(0, '42.031')] -[2024-07-05 11:34:34,830][38723] Updated weights for policy 0, policy_version 10337 (0.0010) -[2024-07-05 11:34:36,599][38723] Updated weights for policy 0, policy_version 10347 (0.0008) -[2024-07-05 11:34:38,354][38723] Updated weights for policy 0, policy_version 10357 (0.0008) -[2024-07-05 11:34:39,685][25826] Fps is (10 sec: 45875.7, 60 sec: 45738.7, 300 sec: 47124.8). Total num frames: 64897024. Throughput: 0: 11327.2. Samples: 3718116. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:34:39,686][25826] Avg episode reward: [(0, '42.049')] -[2024-07-05 11:34:40,127][38723] Updated weights for policy 0, policy_version 10367 (0.0008) -[2024-07-05 11:34:41,890][38723] Updated weights for policy 0, policy_version 10377 (0.0010) -[2024-07-05 11:34:43,694][38723] Updated weights for policy 0, policy_version 10387 (0.0011) -[2024-07-05 11:34:44,685][25826] Fps is (10 sec: 45875.4, 60 sec: 45738.8, 300 sec: 47124.9). Total num frames: 65126400. Throughput: 0: 11331.6. Samples: 3752820. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:34:44,686][25826] Avg episode reward: [(0, '42.046')] -[2024-07-05 11:34:45,537][38723] Updated weights for policy 0, policy_version 10397 (0.0008) -[2024-07-05 11:34:47,337][38723] Updated weights for policy 0, policy_version 10407 (0.0011) -[2024-07-05 11:34:49,175][38723] Updated weights for policy 0, policy_version 10417 (0.0013) -[2024-07-05 11:34:49,685][25826] Fps is (10 sec: 45056.0, 60 sec: 45465.6, 300 sec: 47013.8). Total num frames: 65347584. Throughput: 0: 11349.4. Samples: 3820700. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:34:49,686][25826] Avg episode reward: [(0, '42.435')] -[2024-07-05 11:34:51,020][38723] Updated weights for policy 0, policy_version 10427 (0.0008) -[2024-07-05 11:34:52,769][38723] Updated weights for policy 0, policy_version 10437 (0.0008) -[2024-07-05 11:34:54,558][38723] Updated weights for policy 0, policy_version 10447 (0.0008) -[2024-07-05 11:34:54,685][25826] Fps is (10 sec: 45054.6, 60 sec: 45328.9, 300 sec: 46958.2). Total num frames: 65576960. Throughput: 0: 11383.2. Samples: 3888568. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:34:54,686][25826] Avg episode reward: [(0, '42.821')] -[2024-07-05 11:34:56,346][38723] Updated weights for policy 0, policy_version 10457 (0.0010) -[2024-07-05 11:34:58,172][38723] Updated weights for policy 0, policy_version 10467 (0.0008) -[2024-07-05 11:34:59,685][25826] Fps is (10 sec: 45875.0, 60 sec: 45465.6, 300 sec: 46874.9). Total num frames: 65806336. Throughput: 0: 11405.7. Samples: 3922948. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:34:59,686][25826] Avg episode reward: [(0, '45.474')] -[2024-07-05 11:34:59,939][38723] Updated weights for policy 0, policy_version 10477 (0.0008) -[2024-07-05 11:35:01,713][38723] Updated weights for policy 0, policy_version 10487 (0.0008) -[2024-07-05 11:35:03,485][38723] Updated weights for policy 0, policy_version 10497 (0.0013) -[2024-07-05 11:35:04,685][25826] Fps is (10 sec: 45875.2, 60 sec: 45465.6, 300 sec: 46791.8). Total num frames: 66035712. Throughput: 0: 11442.8. Samples: 3991732. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:35:04,686][25826] Avg episode reward: [(0, '42.596')] -[2024-07-05 11:35:05,320][38723] Updated weights for policy 0, policy_version 10507 (0.0009) -[2024-07-05 11:35:07,183][38723] Updated weights for policy 0, policy_version 10517 (0.0008) -[2024-07-05 11:35:09,107][38723] Updated weights for policy 0, policy_version 10527 (0.0011) -[2024-07-05 11:35:09,685][25826] Fps is (10 sec: 45056.2, 60 sec: 45465.7, 300 sec: 46708.3). Total num frames: 66256896. Throughput: 0: 11380.6. Samples: 4058056. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:35:09,686][25826] Avg episode reward: [(0, '44.522')] -[2024-07-05 11:35:09,700][38703] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000010530_66256896.pth... -[2024-07-05 11:35:09,778][38703] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000009175_55156736.pth -[2024-07-05 11:35:11,040][38723] Updated weights for policy 0, policy_version 10537 (0.0008) -[2024-07-05 11:35:12,947][38723] Updated weights for policy 0, policy_version 10547 (0.0010) -[2024-07-05 11:35:14,685][25826] Fps is (10 sec: 43418.8, 60 sec: 45329.3, 300 sec: 46597.2). Total num frames: 66469888. Throughput: 0: 11330.5. Samples: 4090040. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:35:14,686][25826] Avg episode reward: [(0, '42.765')] -[2024-07-05 11:35:14,735][38723] Updated weights for policy 0, policy_version 10557 (0.0009) -[2024-07-05 11:35:16,631][38723] Updated weights for policy 0, policy_version 10567 (0.0009) -[2024-07-05 11:35:18,607][38723] Updated weights for policy 0, policy_version 10577 (0.0016) -[2024-07-05 11:35:19,685][25826] Fps is (10 sec: 42598.0, 60 sec: 45055.9, 300 sec: 46486.1). Total num frames: 66682880. Throughput: 0: 11239.9. Samples: 4155156. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:35:19,686][25826] Avg episode reward: [(0, '42.978')] -[2024-07-05 11:35:20,507][38723] Updated weights for policy 0, policy_version 10587 (0.0011) -[2024-07-05 11:35:22,316][38723] Updated weights for policy 0, policy_version 10597 (0.0009) -[2024-07-05 11:35:24,061][38723] Updated weights for policy 0, policy_version 10607 (0.0011) -[2024-07-05 11:35:24,685][25826] Fps is (10 sec: 44236.8, 60 sec: 45056.0, 300 sec: 46458.4). Total num frames: 66912256. Throughput: 0: 11194.8. Samples: 4221884. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:35:24,686][25826] Avg episode reward: [(0, '43.598')] -[2024-07-05 11:35:25,863][38723] Updated weights for policy 0, policy_version 10617 (0.0008) -[2024-07-05 11:35:27,642][38723] Updated weights for policy 0, policy_version 10627 (0.0008) -[2024-07-05 11:35:29,411][38723] Updated weights for policy 0, policy_version 10637 (0.0008) -[2024-07-05 11:35:29,685][25826] Fps is (10 sec: 45875.2, 60 sec: 45056.0, 300 sec: 46458.4). Total num frames: 67141632. Throughput: 0: 11187.3. Samples: 4256248. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:35:29,686][25826] Avg episode reward: [(0, '47.967')] -[2024-07-05 11:35:29,690][38703] Saving new best policy, reward=47.967! -[2024-07-05 11:35:31,215][38723] Updated weights for policy 0, policy_version 10647 (0.0015) -[2024-07-05 11:35:33,009][38723] Updated weights for policy 0, policy_version 10657 (0.0013) -[2024-07-05 11:35:34,685][25826] Fps is (10 sec: 45055.9, 60 sec: 44919.5, 300 sec: 46375.1). Total num frames: 67362816. Throughput: 0: 11209.1. Samples: 4325112. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:35:34,686][25826] Avg episode reward: [(0, '43.393')] -[2024-07-05 11:35:34,791][38723] Updated weights for policy 0, policy_version 10667 (0.0011) -[2024-07-05 11:35:36,616][38723] Updated weights for policy 0, policy_version 10677 (0.0008) -[2024-07-05 11:35:38,390][38723] Updated weights for policy 0, policy_version 10687 (0.0009) -[2024-07-05 11:35:39,685][25826] Fps is (10 sec: 45875.7, 60 sec: 45056.0, 300 sec: 46347.3). Total num frames: 67600384. Throughput: 0: 11230.7. Samples: 4393944. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:35:39,686][25826] Avg episode reward: [(0, '42.730')] -[2024-07-05 11:35:40,140][38723] Updated weights for policy 0, policy_version 10697 (0.0008) -[2024-07-05 11:35:41,961][38723] Updated weights for policy 0, policy_version 10707 (0.0009) -[2024-07-05 11:35:43,762][38723] Updated weights for policy 0, policy_version 10717 (0.0010) -[2024-07-05 11:35:44,685][25826] Fps is (10 sec: 46694.2, 60 sec: 45055.9, 300 sec: 46319.5). Total num frames: 67829760. Throughput: 0: 11233.0. Samples: 4428432. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:35:44,686][25826] Avg episode reward: [(0, '45.335')] -[2024-07-05 11:35:45,534][38723] Updated weights for policy 0, policy_version 10727 (0.0008) -[2024-07-05 11:35:47,279][38723] Updated weights for policy 0, policy_version 10737 (0.0007) -[2024-07-05 11:35:49,027][38723] Updated weights for policy 0, policy_version 10747 (0.0008) -[2024-07-05 11:35:49,685][25826] Fps is (10 sec: 45875.2, 60 sec: 45192.6, 300 sec: 46264.0). Total num frames: 68059136. Throughput: 0: 11239.5. Samples: 4497504. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:35:49,686][25826] Avg episode reward: [(0, '46.917')] -[2024-07-05 11:35:50,820][38723] Updated weights for policy 0, policy_version 10757 (0.0010) -[2024-07-05 11:35:52,658][38723] Updated weights for policy 0, policy_version 10767 (0.0008) -[2024-07-05 11:35:54,458][38723] Updated weights for policy 0, policy_version 10777 (0.0011) -[2024-07-05 11:35:54,685][25826] Fps is (10 sec: 45875.3, 60 sec: 45192.7, 300 sec: 46208.4). Total num frames: 68288512. Throughput: 0: 11281.3. Samples: 4565716. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:35:54,686][25826] Avg episode reward: [(0, '46.120')] -[2024-07-05 11:35:56,253][38723] Updated weights for policy 0, policy_version 10787 (0.0009) -[2024-07-05 11:35:58,090][38723] Updated weights for policy 0, policy_version 10797 (0.0008) -[2024-07-05 11:35:59,685][25826] Fps is (10 sec: 45874.8, 60 sec: 45192.5, 300 sec: 46152.9). Total num frames: 68517888. Throughput: 0: 11333.5. Samples: 4600048. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:35:59,686][25826] Avg episode reward: [(0, '44.921')] -[2024-07-05 11:35:59,840][38723] Updated weights for policy 0, policy_version 10807 (0.0008) -[2024-07-05 11:36:01,601][38723] Updated weights for policy 0, policy_version 10817 (0.0008) -[2024-07-05 11:36:03,395][38723] Updated weights for policy 0, policy_version 10827 (0.0008) -[2024-07-05 11:36:04,685][25826] Fps is (10 sec: 45875.3, 60 sec: 45192.7, 300 sec: 46097.4). Total num frames: 68747264. Throughput: 0: 11426.8. Samples: 4669360. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:36:04,686][25826] Avg episode reward: [(0, '42.990')] -[2024-07-05 11:36:05,178][38723] Updated weights for policy 0, policy_version 10837 (0.0008) -[2024-07-05 11:36:06,897][38723] Updated weights for policy 0, policy_version 10847 (0.0011) -[2024-07-05 11:36:08,690][38723] Updated weights for policy 0, policy_version 10857 (0.0009) -[2024-07-05 11:36:09,685][25826] Fps is (10 sec: 45874.8, 60 sec: 45328.9, 300 sec: 46041.8). Total num frames: 68976640. Throughput: 0: 11470.5. Samples: 4738056. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:36:09,686][25826] Avg episode reward: [(0, '43.741')] -[2024-07-05 11:36:10,512][38723] Updated weights for policy 0, policy_version 10867 (0.0011) -[2024-07-05 11:36:12,296][38723] Updated weights for policy 0, policy_version 10877 (0.0008) -[2024-07-05 11:36:14,082][38723] Updated weights for policy 0, policy_version 10887 (0.0010) -[2024-07-05 11:36:14,685][25826] Fps is (10 sec: 45875.3, 60 sec: 45602.1, 300 sec: 45986.3). Total num frames: 69206016. Throughput: 0: 11470.6. Samples: 4772424. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:36:14,686][25826] Avg episode reward: [(0, '45.521')] -[2024-07-05 11:36:15,914][38723] Updated weights for policy 0, policy_version 10897 (0.0008) -[2024-07-05 11:36:17,664][38723] Updated weights for policy 0, policy_version 10907 (0.0009) -[2024-07-05 11:36:19,451][38723] Updated weights for policy 0, policy_version 10917 (0.0007) -[2024-07-05 11:36:19,685][25826] Fps is (10 sec: 45874.7, 60 sec: 45875.1, 300 sec: 45930.7). Total num frames: 69435392. Throughput: 0: 11471.9. Samples: 4841348. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:36:19,686][25826] Avg episode reward: [(0, '45.446')] -[2024-07-05 11:36:21,241][38723] Updated weights for policy 0, policy_version 10927 (0.0011) -[2024-07-05 11:36:23,014][38723] Updated weights for policy 0, policy_version 10937 (0.0011) -[2024-07-05 11:36:24,685][25826] Fps is (10 sec: 45875.0, 60 sec: 45875.2, 300 sec: 45903.0). Total num frames: 69664768. Throughput: 0: 11471.2. Samples: 4910148. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:36:24,686][25826] Avg episode reward: [(0, '47.741')] -[2024-07-05 11:36:24,783][38723] Updated weights for policy 0, policy_version 10947 (0.0009) -[2024-07-05 11:36:26,554][38723] Updated weights for policy 0, policy_version 10957 (0.0010) -[2024-07-05 11:36:28,357][38723] Updated weights for policy 0, policy_version 10967 (0.0010) -[2024-07-05 11:36:29,685][25826] Fps is (10 sec: 45876.2, 60 sec: 45875.2, 300 sec: 45847.4). Total num frames: 69894144. Throughput: 0: 11474.7. Samples: 4944792. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:36:29,686][25826] Avg episode reward: [(0, '43.307')] -[2024-07-05 11:36:30,129][38723] Updated weights for policy 0, policy_version 10977 (0.0009) -[2024-07-05 11:36:31,883][38723] Updated weights for policy 0, policy_version 10987 (0.0008) -[2024-07-05 11:36:32,080][38703] Stopping Batcher_0... -[2024-07-05 11:36:32,080][38703] Loop batcher_evt_loop terminating... -[2024-07-05 11:36:32,083][25826] Component Batcher_0 stopped! -[2024-07-05 11:36:32,088][38703] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000010988_70008832.pth... -[2024-07-05 11:36:32,127][38723] Weights refcount: 2 0 -[2024-07-05 11:36:32,129][38723] Stopping InferenceWorker_p0-w0... -[2024-07-05 11:36:32,130][38723] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 11:36:32,133][25826] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 11:36:32,150][38753] Stopping RolloutWorker_w15... -[2024-07-05 11:36:32,150][38753] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 11:36:32,151][38749] Stopping RolloutWorker_w10... -[2024-07-05 11:36:32,152][38749] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 11:36:32,152][38727] Stopping RolloutWorker_w2... -[2024-07-05 11:36:32,152][38725] Stopping RolloutWorker_w1... -[2024-07-05 11:36:32,152][38754] Stopping RolloutWorker_w14... -[2024-07-05 11:36:32,153][38754] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 11:36:32,152][38727] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 11:36:32,153][38725] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 11:36:32,150][25826] Component RolloutWorker_w4 stopped! -[2024-07-05 11:36:32,150][38728] Stopping RolloutWorker_w4... -[2024-07-05 11:36:32,157][38728] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 11:36:32,156][25826] Component RolloutWorker_w15 stopped! -[2024-07-05 11:36:32,158][38750] Stopping RolloutWorker_w11... -[2024-07-05 11:36:32,158][38750] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 11:36:32,158][38751] Stopping RolloutWorker_w13... -[2024-07-05 11:36:32,158][38748] Stopping RolloutWorker_w8... -[2024-07-05 11:36:32,159][38751] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 11:36:32,159][38748] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 11:36:32,158][25826] Component RolloutWorker_w10 stopped! -[2024-07-05 11:36:32,160][25826] Component RolloutWorker_w14 stopped! -[2024-07-05 11:36:32,161][25826] Component RolloutWorker_w2 stopped! -[2024-07-05 11:36:32,162][25826] Component RolloutWorker_w1 stopped! -[2024-07-05 11:36:32,162][38726] Stopping RolloutWorker_w3... -[2024-07-05 11:36:32,163][25826] Component RolloutWorker_w11 stopped! -[2024-07-05 11:36:32,164][25826] Component RolloutWorker_w13 stopped! -[2024-07-05 11:36:32,165][38732] Stopping RolloutWorker_w9... -[2024-07-05 11:36:32,165][38730] Stopping RolloutWorker_w5... -[2024-07-05 11:36:32,165][25826] Component RolloutWorker_w8 stopped! -[2024-07-05 11:36:32,165][38732] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 11:36:32,165][25826] Component RolloutWorker_w3 stopped! -[2024-07-05 11:36:32,166][25826] Component RolloutWorker_w0 stopped! -[2024-07-05 11:36:32,167][38703] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000009861_60776448.pth -[2024-07-05 11:36:32,164][38724] Stopping RolloutWorker_w0... -[2024-07-05 11:36:32,167][25826] Component RolloutWorker_w6 stopped! -[2024-07-05 11:36:32,170][25826] Component RolloutWorker_w9 stopped! -[2024-07-05 11:36:32,170][38731] Stopping RolloutWorker_w7... -[2024-07-05 11:36:32,170][25826] Component RolloutWorker_w5 stopped! -[2024-07-05 11:36:32,169][38726] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 11:36:32,171][38731] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 11:36:32,171][25826] Component RolloutWorker_w7 stopped! -[2024-07-05 11:36:32,171][38724] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 11:36:32,171][38730] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 11:36:32,175][38703] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000010988_70008832.pth... -[2024-07-05 11:36:32,192][38752] Stopping RolloutWorker_w12... -[2024-07-05 11:36:32,164][38729] Stopping RolloutWorker_w6... -[2024-07-05 11:36:32,193][38752] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 11:36:32,192][25826] Component RolloutWorker_w12 stopped! -[2024-07-05 11:36:32,198][38729] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 11:36:32,287][38703] Stopping LearnerWorker_p0... -[2024-07-05 11:36:32,287][38703] Loop learner_proc0_evt_loop terminating... -[2024-07-05 11:36:32,287][25826] Component LearnerWorker_p0 stopped! -[2024-07-05 11:36:32,288][25826] Waiting for process learner_proc0 to stop... -[2024-07-05 11:36:33,831][25826] Waiting for process inference_proc0-0 to join... -[2024-07-05 11:36:33,832][25826] Waiting for process rollout_proc0 to join... -[2024-07-05 11:36:33,832][25826] Waiting for process rollout_proc1 to join... -[2024-07-05 11:36:33,832][25826] Waiting for process rollout_proc2 to join... -[2024-07-05 11:36:33,833][25826] Waiting for process rollout_proc3 to join... -[2024-07-05 11:36:33,833][25826] Waiting for process rollout_proc4 to join... -[2024-07-05 11:36:33,833][25826] Waiting for process rollout_proc5 to join... -[2024-07-05 11:36:33,834][25826] Waiting for process rollout_proc6 to join... -[2024-07-05 11:36:33,834][25826] Waiting for process rollout_proc7 to join... -[2024-07-05 11:36:33,834][25826] Waiting for process rollout_proc8 to join... -[2024-07-05 11:36:33,835][25826] Waiting for process rollout_proc9 to join... -[2024-07-05 11:36:33,835][25826] Waiting for process rollout_proc10 to join... -[2024-07-05 11:36:33,835][25826] Waiting for process rollout_proc11 to join... -[2024-07-05 11:36:33,836][25826] Waiting for process rollout_proc12 to join... -[2024-07-05 11:36:33,836][25826] Waiting for process rollout_proc13 to join... -[2024-07-05 11:36:33,836][25826] Waiting for process rollout_proc14 to join... -[2024-07-05 11:36:33,836][25826] Waiting for process rollout_proc15 to join... -[2024-07-05 11:36:33,837][25826] Batcher 0 profile tree view: -batching: 31.1755, releasing_batches: 0.0581 -[2024-07-05 11:36:33,837][25826] InferenceWorker_p0-w0 profile tree view: -wait_policy: 0.0000 - wait_policy_total: 16.2722 -update_model: 6.8049 - weight_update: 0.0008 -one_step: 0.0025 - handle_policy_step: 395.7835 - deserialize: 32.4813, stack: 2.2534, obs_to_device_normalize: 95.0526, forward: 183.4169, send_messages: 19.8007 - prepare_outputs: 49.0704 - to_cpu: 29.5464 -[2024-07-05 11:36:33,837][25826] Learner 0 profile tree view: -misc: 0.0115, prepare_batch: 40.3220 -train: 88.0144 - epoch_init: 0.0079, minibatch_init: 0.0110, losses_postprocess: 0.5368, kl_divergence: 0.5839, after_optimizer: 0.5923 - calculate_losses: 31.1876 - losses_init: 0.0046, forward_head: 1.4184, bptt_initial: 24.2470, tail: 1.1510, advantages_returns: 0.3089, losses: 1.7368 - bptt: 1.9692 - bptt_forward_core: 1.8773 - update: 54.2590 - clip: 1.6949 -[2024-07-05 11:36:33,838][25826] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.1824, enqueue_policy_requests: 12.2761, env_step: 200.9894, overhead: 21.7944, complete_rollouts: 0.4379 -save_policy_outputs: 14.8675 - split_output_tensors: 6.9744 -[2024-07-05 11:36:33,838][25826] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.1915, enqueue_policy_requests: 13.0398, env_step: 207.7644, overhead: 23.0416, complete_rollouts: 0.4270 -save_policy_outputs: 15.0362 - split_output_tensors: 6.9323 -[2024-07-05 11:36:33,838][25826] Loop Runner_EvtLoop terminating... -[2024-07-05 11:36:33,839][25826] Runner profile tree view: -main_loop: 441.0909 -[2024-07-05 11:36:33,839][25826] Collected {0: 70008832}, FPS: 45334.6 -[2024-07-05 11:38:02,399][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 11:38:02,400][25826] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 11:38:02,401][25826] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 11:38:02,401][25826] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 11:38:02,401][25826] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 11:38:02,401][25826] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 11:38:02,402][25826] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 11:38:02,402][25826] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 11:38:02,403][25826] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 11:38:02,403][25826] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 11:38:02,403][25826] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 11:38:02,403][25826] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 11:38:02,404][25826] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 11:38:02,404][25826] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 11:38:02,404][25826] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 11:38:02,421][25826] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:38:02,423][25826] RunningMeanStd input shape: (1,) -[2024-07-05 11:38:02,434][25826] ConvEncoder: input_channels=3 -[2024-07-05 11:38:02,467][25826] Conv encoder output size: 512 -[2024-07-05 11:38:02,468][25826] Policy head output size: 512 -[2024-07-05 11:38:02,497][25826] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000010988_70008832.pth... -[2024-07-05 11:38:02,838][25826] Num frames 100... -[2024-07-05 11:38:02,901][25826] Num frames 200... -[2024-07-05 11:38:02,964][25826] Num frames 300... -[2024-07-05 11:38:03,029][25826] Num frames 400... -[2024-07-05 11:38:03,090][25826] Num frames 500... -[2024-07-05 11:38:03,149][25826] Num frames 600... -[2024-07-05 11:38:03,208][25826] Num frames 700... -[2024-07-05 11:38:03,270][25826] Num frames 800... -[2024-07-05 11:38:03,328][25826] Num frames 900... -[2024-07-05 11:38:03,386][25826] Num frames 1000... -[2024-07-05 11:38:03,456][25826] Num frames 1100... -[2024-07-05 11:38:03,517][25826] Num frames 1200... -[2024-07-05 11:38:03,575][25826] Num frames 1300... -[2024-07-05 11:38:03,636][25826] Num frames 1400... -[2024-07-05 11:38:03,698][25826] Num frames 1500... -[2024-07-05 11:38:03,763][25826] Num frames 1600... -[2024-07-05 11:38:03,815][25826] Avg episode rewards: #0: 36.000, true rewards: #0: 16.000 -[2024-07-05 11:38:03,816][25826] Avg episode reward: 36.000, avg true_objective: 16.000 -[2024-07-05 11:38:03,885][25826] Num frames 1700... -[2024-07-05 11:38:03,949][25826] Num frames 1800... -[2024-07-05 11:38:04,011][25826] Num frames 1900... -[2024-07-05 11:38:04,073][25826] Num frames 2000... -[2024-07-05 11:38:04,138][25826] Num frames 2100... -[2024-07-05 11:38:04,201][25826] Num frames 2200... -[2024-07-05 11:38:04,264][25826] Num frames 2300... -[2024-07-05 11:38:04,326][25826] Num frames 2400... -[2024-07-05 11:38:04,390][25826] Num frames 2500... -[2024-07-05 11:38:04,450][25826] Num frames 2600... -[2024-07-05 11:38:04,512][25826] Num frames 2700... -[2024-07-05 11:38:04,576][25826] Num frames 2800... -[2024-07-05 11:38:04,641][25826] Num frames 2900... -[2024-07-05 11:38:04,703][25826] Num frames 3000... -[2024-07-05 11:38:04,761][25826] Num frames 3100... -[2024-07-05 11:38:04,822][25826] Num frames 3200... -[2024-07-05 11:38:04,885][25826] Num frames 3300... -[2024-07-05 11:38:04,945][25826] Num frames 3400... -[2024-07-05 11:38:05,003][25826] Num frames 3500... -[2024-07-05 11:38:05,065][25826] Num frames 3600... -[2024-07-05 11:38:05,130][25826] Num frames 3700... -[2024-07-05 11:38:05,181][25826] Avg episode rewards: #0: 47.499, true rewards: #0: 18.500 -[2024-07-05 11:38:05,182][25826] Avg episode reward: 47.499, avg true_objective: 18.500 -[2024-07-05 11:38:05,246][25826] Num frames 3800... -[2024-07-05 11:38:05,310][25826] Num frames 3900... -[2024-07-05 11:38:05,370][25826] Num frames 4000... -[2024-07-05 11:38:05,429][25826] Num frames 4100... -[2024-07-05 11:38:05,492][25826] Avg episode rewards: #0: 34.720, true rewards: #0: 13.720 -[2024-07-05 11:38:05,493][25826] Avg episode reward: 34.720, avg true_objective: 13.720 -[2024-07-05 11:38:05,549][25826] Num frames 4200... -[2024-07-05 11:38:05,610][25826] Num frames 4300... -[2024-07-05 11:38:05,668][25826] Num frames 4400... -[2024-07-05 11:38:05,729][25826] Num frames 4500... -[2024-07-05 11:38:05,790][25826] Num frames 4600... -[2024-07-05 11:38:05,855][25826] Num frames 4700... -[2024-07-05 11:38:05,920][25826] Num frames 4800... -[2024-07-05 11:38:05,981][25826] Num frames 4900... -[2024-07-05 11:38:06,041][25826] Num frames 5000... -[2024-07-05 11:38:06,104][25826] Num frames 5100... -[2024-07-05 11:38:06,167][25826] Num frames 5200... -[2024-07-05 11:38:06,229][25826] Num frames 5300... -[2024-07-05 11:38:06,291][25826] Num frames 5400... -[2024-07-05 11:38:06,360][25826] Num frames 5500... -[2024-07-05 11:38:06,421][25826] Num frames 5600... -[2024-07-05 11:38:06,480][25826] Num frames 5700... -[2024-07-05 11:38:06,542][25826] Num frames 5800... -[2024-07-05 11:38:06,601][25826] Num frames 5900... -[2024-07-05 11:38:06,661][25826] Num frames 6000... -[2024-07-05 11:38:06,720][25826] Num frames 6100... -[2024-07-05 11:38:06,780][25826] Num frames 6200... -[2024-07-05 11:38:06,844][25826] Avg episode rewards: #0: 40.789, true rewards: #0: 15.540 -[2024-07-05 11:38:06,845][25826] Avg episode reward: 40.789, avg true_objective: 15.540 -[2024-07-05 11:38:06,898][25826] Num frames 6300... -[2024-07-05 11:38:06,958][25826] Num frames 6400... -[2024-07-05 11:38:07,019][25826] Num frames 6500... -[2024-07-05 11:38:07,081][25826] Num frames 6600... -[2024-07-05 11:38:07,141][25826] Num frames 6700... -[2024-07-05 11:38:07,199][25826] Num frames 6800... -[2024-07-05 11:38:07,260][25826] Num frames 6900... -[2024-07-05 11:38:07,319][25826] Num frames 7000... -[2024-07-05 11:38:07,378][25826] Num frames 7100... -[2024-07-05 11:38:07,440][25826] Num frames 7200... -[2024-07-05 11:38:07,501][25826] Num frames 7300... -[2024-07-05 11:38:07,563][25826] Num frames 7400... -[2024-07-05 11:38:07,624][25826] Num frames 7500... -[2024-07-05 11:38:07,686][25826] Num frames 7600... -[2024-07-05 11:38:07,746][25826] Num frames 7700... -[2024-07-05 11:38:07,805][25826] Num frames 7800... -[2024-07-05 11:38:07,868][25826] Num frames 7900... -[2024-07-05 11:38:07,930][25826] Num frames 8000... -[2024-07-05 11:38:07,990][25826] Num frames 8100... -[2024-07-05 11:38:08,050][25826] Num frames 8200... -[2024-07-05 11:38:08,110][25826] Num frames 8300... -[2024-07-05 11:38:08,172][25826] Avg episode rewards: #0: 44.231, true rewards: #0: 16.632 -[2024-07-05 11:38:08,174][25826] Avg episode reward: 44.231, avg true_objective: 16.632 -[2024-07-05 11:38:08,230][25826] Num frames 8400... -[2024-07-05 11:38:08,292][25826] Num frames 8500... -[2024-07-05 11:38:08,363][25826] Num frames 8600... -[2024-07-05 11:38:08,422][25826] Num frames 8700... -[2024-07-05 11:38:08,481][25826] Num frames 8800... -[2024-07-05 11:38:08,542][25826] Num frames 8900... -[2024-07-05 11:38:08,603][25826] Num frames 9000... -[2024-07-05 11:38:08,667][25826] Num frames 9100... -[2024-07-05 11:38:08,728][25826] Num frames 9200... -[2024-07-05 11:38:08,789][25826] Num frames 9300... -[2024-07-05 11:38:08,852][25826] Num frames 9400... -[2024-07-05 11:38:08,914][25826] Num frames 9500... -[2024-07-05 11:38:08,965][25826] Avg episode rewards: #0: 40.999, true rewards: #0: 15.833 -[2024-07-05 11:38:08,967][25826] Avg episode reward: 40.999, avg true_objective: 15.833 -[2024-07-05 11:38:09,030][25826] Num frames 9600... -[2024-07-05 11:38:09,089][25826] Num frames 9700... -[2024-07-05 11:38:09,148][25826] Num frames 9800... -[2024-07-05 11:38:09,208][25826] Num frames 9900... -[2024-07-05 11:38:09,267][25826] Num frames 10000... -[2024-07-05 11:38:09,326][25826] Num frames 10100... -[2024-07-05 11:38:09,398][25826] Num frames 10200... -[2024-07-05 11:38:09,463][25826] Num frames 10300... -[2024-07-05 11:38:09,521][25826] Num frames 10400... -[2024-07-05 11:38:09,590][25826] Num frames 10500... -[2024-07-05 11:38:09,653][25826] Num frames 10600... -[2024-07-05 11:38:09,712][25826] Num frames 10700... -[2024-07-05 11:38:09,771][25826] Num frames 10800... -[2024-07-05 11:38:09,834][25826] Num frames 10900... -[2024-07-05 11:38:09,893][25826] Num frames 11000... -[2024-07-05 11:38:09,956][25826] Num frames 11100... -[2024-07-05 11:38:10,021][25826] Num frames 11200... -[2024-07-05 11:38:10,081][25826] Num frames 11300... -[2024-07-05 11:38:10,143][25826] Num frames 11400... -[2024-07-05 11:38:10,229][25826] Avg episode rewards: #0: 43.359, true rewards: #0: 16.360 -[2024-07-05 11:38:10,230][25826] Avg episode reward: 43.359, avg true_objective: 16.360 -[2024-07-05 11:38:10,268][25826] Num frames 11500... -[2024-07-05 11:38:10,327][25826] Num frames 11600... -[2024-07-05 11:38:10,389][25826] Num frames 11700... -[2024-07-05 11:38:10,451][25826] Num frames 11800... -[2024-07-05 11:38:10,510][25826] Num frames 11900... -[2024-07-05 11:38:10,571][25826] Num frames 12000... -[2024-07-05 11:38:10,630][25826] Num frames 12100... -[2024-07-05 11:38:10,689][25826] Num frames 12200... -[2024-07-05 11:38:10,750][25826] Num frames 12300... -[2024-07-05 11:38:10,813][25826] Num frames 12400... -[2024-07-05 11:38:10,872][25826] Num frames 12500... -[2024-07-05 11:38:10,932][25826] Num frames 12600... -[2024-07-05 11:38:10,994][25826] Num frames 12700... -[2024-07-05 11:38:11,055][25826] Num frames 12800... -[2024-07-05 11:38:11,115][25826] Num frames 12900... -[2024-07-05 11:38:11,176][25826] Num frames 13000... -[2024-07-05 11:38:11,239][25826] Num frames 13100... -[2024-07-05 11:38:11,300][25826] Num frames 13200... -[2024-07-05 11:38:11,361][25826] Num frames 13300... -[2024-07-05 11:38:11,422][25826] Num frames 13400... -[2024-07-05 11:38:11,482][25826] Num frames 13500... -[2024-07-05 11:38:11,567][25826] Avg episode rewards: #0: 45.814, true rewards: #0: 16.940 -[2024-07-05 11:38:11,568][25826] Avg episode reward: 45.814, avg true_objective: 16.940 -[2024-07-05 11:38:11,603][25826] Num frames 13600... -[2024-07-05 11:38:11,663][25826] Num frames 13700... -[2024-07-05 11:38:11,724][25826] Num frames 13800... -[2024-07-05 11:38:11,782][25826] Num frames 13900... -[2024-07-05 11:38:11,844][25826] Num frames 14000... -[2024-07-05 11:38:11,903][25826] Num frames 14100... -[2024-07-05 11:38:11,963][25826] Num frames 14200... -[2024-07-05 11:38:12,055][25826] Avg episode rewards: #0: 42.624, true rewards: #0: 15.847 -[2024-07-05 11:38:12,057][25826] Avg episode reward: 42.624, avg true_objective: 15.847 -[2024-07-05 11:38:12,084][25826] Num frames 14300... -[2024-07-05 11:38:12,147][25826] Num frames 14400... -[2024-07-05 11:38:12,208][25826] Num frames 14500... -[2024-07-05 11:38:12,270][25826] Num frames 14600... -[2024-07-05 11:38:12,331][25826] Num frames 14700... -[2024-07-05 11:38:12,392][25826] Num frames 14800... -[2024-07-05 11:38:12,452][25826] Num frames 14900... -[2024-07-05 11:38:12,521][25826] Num frames 15000... -[2024-07-05 11:38:12,584][25826] Num frames 15100... -[2024-07-05 11:38:12,652][25826] Num frames 15200... -[2024-07-05 11:38:12,713][25826] Num frames 15300... -[2024-07-05 11:38:12,772][25826] Num frames 15400... -[2024-07-05 11:38:12,832][25826] Num frames 15500... -[2024-07-05 11:38:12,893][25826] Num frames 15600... -[2024-07-05 11:38:12,954][25826] Num frames 15700... -[2024-07-05 11:38:13,014][25826] Num frames 15800... -[2024-07-05 11:38:13,077][25826] Num frames 15900... -[2024-07-05 11:38:13,141][25826] Num frames 16000... -[2024-07-05 11:38:13,201][25826] Num frames 16100... -[2024-07-05 11:38:13,263][25826] Num frames 16200... -[2024-07-05 11:38:13,325][25826] Num frames 16300... -[2024-07-05 11:38:13,418][25826] Avg episode rewards: #0: 44.361, true rewards: #0: 16.362 -[2024-07-05 11:38:13,419][25826] Avg episode reward: 44.361, avg true_objective: 16.362 -[2024-07-05 11:38:30,719][25826] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 11:39:29,353][25826] Environment doom_basic already registered, overwriting... -[2024-07-05 11:39:29,355][25826] Environment doom_two_colors_easy already registered, overwriting... -[2024-07-05 11:39:29,356][25826] Environment doom_two_colors_hard already registered, overwriting... -[2024-07-05 11:39:29,356][25826] Environment doom_dm already registered, overwriting... -[2024-07-05 11:39:29,356][25826] Environment doom_dwango5 already registered, overwriting... -[2024-07-05 11:39:29,357][25826] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-07-05 11:39:29,357][25826] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-07-05 11:39:29,357][25826] Environment doom_my_way_home already registered, overwriting... -[2024-07-05 11:39:29,357][25826] Environment doom_deadly_corridor already registered, overwriting... -[2024-07-05 11:39:29,358][25826] Environment doom_defend_the_center already registered, overwriting... -[2024-07-05 11:39:29,358][25826] Environment doom_defend_the_line already registered, overwriting... -[2024-07-05 11:39:29,358][25826] Environment doom_health_gathering already registered, overwriting... -[2024-07-05 11:39:29,359][25826] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-07-05 11:39:29,359][25826] Environment doom_battle already registered, overwriting... -[2024-07-05 11:39:29,360][25826] Environment doom_battle2 already registered, overwriting... -[2024-07-05 11:39:29,360][25826] Environment doom_duel_bots already registered, overwriting... -[2024-07-05 11:39:29,360][25826] Environment doom_deathmatch_bots already registered, overwriting... -[2024-07-05 11:39:29,361][25826] Environment doom_duel already registered, overwriting... -[2024-07-05 11:39:29,361][25826] Environment doom_deathmatch_full already registered, overwriting... -[2024-07-05 11:39:29,361][25826] Environment doom_benchmark already registered, overwriting... -[2024-07-05 11:39:29,362][25826] register_encoder_factory: -[2024-07-05 11:39:29,367][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 11:39:29,367][25826] Overriding arg 'train_for_env_steps' with value 100000000 passed from command line -[2024-07-05 11:39:29,371][25826] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment already exists! -[2024-07-05 11:39:29,372][25826] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment... -[2024-07-05 11:39:29,372][25826] Weights and Biases integration disabled -[2024-07-05 11:39:29,374][25826] Environment var CUDA_VISIBLE_DEVICES is 0 - -[2024-07-05 11:39:32,393][25826] Starting experiment with the following configuration: -help=False -algo=APPO -env=doom_health_gathering_supreme -experiment=default_experiment -train_dir=/home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir -restart_behavior=resume -device=gpu -seed=200 -num_policies=1 -async_rl=True -serial_mode=False -batched_sampling=False -num_batches_to_accumulate=2 -worker_num_splits=2 -policy_workers_per_policy=1 -max_policy_lag=1000 -num_workers=16 -num_envs_per_worker=8 -batch_size=2048 -num_batches_per_epoch=1 -num_epochs=1 -rollout=32 -recurrence=32 -shuffle_minibatches=False -gamma=0.99 -reward_scale=1.0 -reward_clip=1000.0 -value_bootstrap=False -normalize_returns=True -exploration_loss_coeff=0.001 -value_loss_coeff=0.5 -kl_loss_coeff=0.0 -exploration_loss=symmetric_kl -gae_lambda=0.95 -ppo_clip_ratio=0.1 -ppo_clip_value=0.2 -with_vtrace=False -vtrace_rho=1.0 -vtrace_c=1.0 -optimizer=adam -adam_eps=1e-06 -adam_beta1=0.9 -adam_beta2=0.999 -max_grad_norm=4.0 -learning_rate=0.0001 -lr_schedule=constant -lr_schedule_kl_threshold=0.008 -lr_adaptive_min=1e-06 -lr_adaptive_max=0.01 -obs_subtract_mean=0.0 -obs_scale=255.0 -normalize_input=True -normalize_input_keys=None -decorrelate_experience_max_seconds=0 -decorrelate_envs_on_one_worker=True -actor_worker_gpus=[] -set_workers_cpu_affinity=True -force_envs_single_thread=False -default_niceness=0 -log_to_file=True -experiment_summaries_interval=10 -flush_summaries_interval=30 -stats_avg=100 -summaries_use_frameskip=True -heartbeat_interval=20 -heartbeat_reporting_interval=600 -train_for_env_steps=100000000 -train_for_seconds=10000000000 -save_every_sec=120 -keep_checkpoints=2 -load_checkpoint_kind=latest -save_milestones_sec=-1 -save_best_every_sec=5 -save_best_metric=reward -save_best_after=100000 -benchmark=False -encoder_mlp_layers=[512, 512] -encoder_conv_architecture=convnet_simple -encoder_conv_mlp_layers=[512] -use_rnn=True -rnn_size=512 -rnn_type=gru -rnn_num_layers=1 -decoder_mlp_layers=[] -nonlinearity=elu -policy_initialization=orthogonal -policy_init_gain=1.0 -actor_critic_share_weights=True -adaptive_stddev=True -continuous_tanh_scale=0.0 -initial_stddev=1.0 -use_env_info_cache=False -env_gpu_actions=False -env_gpu_observations=True -env_frameskip=4 -env_framestack=1 -pixel_format=CHW -use_record_episode_statistics=False -with_wandb=False -wandb_user=None -wandb_project=sample_factory -wandb_group=None -wandb_job_type=SF -wandb_tags=[] -with_pbt=False -pbt_mix_policies_in_one_env=True -pbt_period_env_steps=5000000 -pbt_start_mutation=20000000 -pbt_replace_fraction=0.3 -pbt_mutation_rate=0.15 -pbt_replace_reward_gap=0.1 -pbt_replace_reward_gap_absolute=1e-06 -pbt_optimize_gamma=False -pbt_target_objective=true_objective -pbt_perturb_min=1.1 -pbt_perturb_max=1.5 -num_agents=-1 -num_humans=0 -num_bots=-1 -start_bot_difficulty=None -timelimit=None -res_w=128 -res_h=72 -wide_aspect_ratio=False -eval_env_frameskip=1 -fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=20000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 20000000} -git_hash=unknown -git_repo_name=not a git repository -[2024-07-05 11:39:32,402][25826] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 11:39:32,404][25826] Rollout worker 0 uses device cpu -[2024-07-05 11:39:32,404][25826] Rollout worker 1 uses device cpu -[2024-07-05 11:39:32,404][25826] Rollout worker 2 uses device cpu -[2024-07-05 11:39:32,404][25826] Rollout worker 3 uses device cpu -[2024-07-05 11:39:32,405][25826] Rollout worker 4 uses device cpu -[2024-07-05 11:39:32,405][25826] Rollout worker 5 uses device cpu -[2024-07-05 11:39:32,406][25826] Rollout worker 6 uses device cpu -[2024-07-05 11:39:32,406][25826] Rollout worker 7 uses device cpu -[2024-07-05 11:39:32,406][25826] Rollout worker 8 uses device cpu -[2024-07-05 11:39:32,407][25826] Rollout worker 9 uses device cpu -[2024-07-05 11:39:32,407][25826] Rollout worker 10 uses device cpu -[2024-07-05 11:39:32,408][25826] Rollout worker 11 uses device cpu -[2024-07-05 11:39:32,408][25826] Rollout worker 12 uses device cpu -[2024-07-05 11:39:32,408][25826] Rollout worker 13 uses device cpu -[2024-07-05 11:39:32,409][25826] Rollout worker 14 uses device cpu -[2024-07-05 11:39:32,409][25826] Rollout worker 15 uses device cpu -[2024-07-05 11:39:32,501][25826] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:39:32,502][25826] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 11:39:32,554][25826] Starting all processes... -[2024-07-05 11:39:32,555][25826] Starting process learner_proc0 -[2024-07-05 11:39:32,604][25826] Starting all processes... -[2024-07-05 11:39:32,607][25826] Starting process inference_proc0-0 -[2024-07-05 11:39:32,608][25826] Starting process rollout_proc0 -[2024-07-05 11:39:32,609][25826] Starting process rollout_proc1 -[2024-07-05 11:39:32,609][25826] Starting process rollout_proc2 -[2024-07-05 11:39:32,609][25826] Starting process rollout_proc3 -[2024-07-05 11:39:32,609][25826] Starting process rollout_proc4 -[2024-07-05 11:39:32,611][25826] Starting process rollout_proc5 -[2024-07-05 11:39:32,611][25826] Starting process rollout_proc6 -[2024-07-05 11:39:32,614][25826] Starting process rollout_proc7 -[2024-07-05 11:39:32,614][25826] Starting process rollout_proc8 -[2024-07-05 11:39:32,616][25826] Starting process rollout_proc9 -[2024-07-05 11:39:32,616][25826] Starting process rollout_proc10 -[2024-07-05 11:39:32,617][25826] Starting process rollout_proc11 -[2024-07-05 11:39:32,619][25826] Starting process rollout_proc12 -[2024-07-05 11:39:32,619][25826] Starting process rollout_proc13 -[2024-07-05 11:39:32,621][25826] Starting process rollout_proc14 -[2024-07-05 11:39:32,637][25826] Starting process rollout_proc15 -[2024-07-05 11:39:36,706][43183] Worker 1 uses CPU cores [1] -[2024-07-05 11:39:36,740][43182] Worker 0 uses CPU cores [0] -[2024-07-05 11:39:36,756][43187] Worker 5 uses CPU cores [5] -[2024-07-05 11:39:36,760][43207] Worker 10 uses CPU cores [10] -[2024-07-05 11:39:36,786][43209] Worker 12 uses CPU cores [12] -[2024-07-05 11:39:36,804][43208] Worker 11 uses CPU cores [11] -[2024-07-05 11:39:36,836][43190] Worker 6 uses CPU cores [6] -[2024-07-05 11:39:36,850][43184] Worker 2 uses CPU cores [2] -[2024-07-05 11:39:36,869][43188] Worker 7 uses CPU cores [7] -[2024-07-05 11:39:36,876][43211] Worker 15 uses CPU cores [15] -[2024-07-05 11:39:36,880][43186] Worker 4 uses CPU cores [4] -[2024-07-05 11:39:36,892][43185] Worker 3 uses CPU cores [3] -[2024-07-05 11:39:36,939][43161] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:39:36,939][43161] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 11:39:36,992][43161] Num visible devices: 1 -[2024-07-05 11:39:37,019][43212] Worker 14 uses CPU cores [14] -[2024-07-05 11:39:37,025][43161] Setting fixed seed 200 -[2024-07-05 11:39:37,028][43161] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:39:37,028][43161] Initializing actor-critic model on device cuda:0 -[2024-07-05 11:39:37,029][43161] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:39:37,030][43161] RunningMeanStd input shape: (1,) -[2024-07-05 11:39:37,034][43206] Worker 9 uses CPU cores [9] -[2024-07-05 11:39:37,038][43161] ConvEncoder: input_channels=3 -[2024-07-05 11:39:37,066][43181] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:39:37,066][43181] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 11:39:37,092][43210] Worker 13 uses CPU cores [13] -[2024-07-05 11:39:37,110][43189] Worker 8 uses CPU cores [8] -[2024-07-05 11:39:37,115][43181] Num visible devices: 1 -[2024-07-05 11:39:37,120][43161] Conv encoder output size: 512 -[2024-07-05 11:39:37,120][43161] Policy head output size: 512 -[2024-07-05 11:39:37,135][43161] Created Actor Critic model with architecture: -[2024-07-05 11:39:37,135][43161] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) - ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) - ) - ) - ) - ) - (core): ModelCoreRNN( - (core): GRU(512, 512) - ) - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=5, bias=True) - ) -) -[2024-07-05 11:39:37,263][43161] Using optimizer -[2024-07-05 11:39:37,891][43161] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000010988_70008832.pth... -[2024-07-05 11:39:37,911][43161] Loading model from checkpoint -[2024-07-05 11:39:37,912][43161] Loaded experiment state at self.train_step=10988, self.env_steps=70008832 -[2024-07-05 11:39:37,912][43161] Initialized policy 0 weights for model version 10988 -[2024-07-05 11:39:37,913][43161] LearnerWorker_p0 finished initialization! -[2024-07-05 11:39:37,914][43161] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:39:37,980][43181] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:39:37,981][43181] RunningMeanStd input shape: (1,) -[2024-07-05 11:39:37,988][43181] ConvEncoder: input_channels=3 -[2024-07-05 11:39:38,043][43181] Conv encoder output size: 512 -[2024-07-05 11:39:38,043][43181] Policy head output size: 512 -[2024-07-05 11:39:38,074][25826] Inference worker 0-0 is ready! -[2024-07-05 11:39:38,075][25826] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 11:39:38,121][43209] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,122][43212] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,122][43183] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,124][43190] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,125][43182] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,126][43186] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,126][43211] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,128][43185] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,130][43187] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,131][43184] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,131][43207] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,138][43189] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,146][43208] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,152][43188] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,154][43210] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,157][43206] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:39:38,708][43182] Decorrelating experience for 0 frames... -[2024-07-05 11:39:38,726][43212] Decorrelating experience for 0 frames... -[2024-07-05 11:39:38,727][43209] Decorrelating experience for 0 frames... -[2024-07-05 11:39:38,727][43190] Decorrelating experience for 0 frames... -[2024-07-05 11:39:38,727][43207] Decorrelating experience for 0 frames... -[2024-07-05 11:39:38,728][43189] Decorrelating experience for 0 frames... -[2024-07-05 11:39:38,729][43185] Decorrelating experience for 0 frames... -[2024-07-05 11:39:38,729][43183] Decorrelating experience for 0 frames... -[2024-07-05 11:39:38,874][43211] Decorrelating experience for 0 frames... -[2024-07-05 11:39:38,881][43190] Decorrelating experience for 32 frames... -[2024-07-05 11:39:38,882][43209] Decorrelating experience for 32 frames... -[2024-07-05 11:39:38,923][43182] Decorrelating experience for 32 frames... -[2024-07-05 11:39:38,933][43187] Decorrelating experience for 0 frames... -[2024-07-05 11:39:38,942][43183] Decorrelating experience for 32 frames... -[2024-07-05 11:39:38,943][43185] Decorrelating experience for 32 frames... -[2024-07-05 11:39:38,944][43184] Decorrelating experience for 0 frames... -[2024-07-05 11:39:39,051][43190] Decorrelating experience for 64 frames... -[2024-07-05 11:39:39,064][43207] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,076][43186] Decorrelating experience for 0 frames... -[2024-07-05 11:39:39,091][43212] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,110][43210] Decorrelating experience for 0 frames... -[2024-07-05 11:39:39,113][43183] Decorrelating experience for 64 frames... -[2024-07-05 11:39:39,137][43189] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,163][43208] Decorrelating experience for 0 frames... -[2024-07-05 11:39:39,178][43206] Decorrelating experience for 0 frames... -[2024-07-05 11:39:39,250][43209] Decorrelating experience for 64 frames... -[2024-07-05 11:39:39,251][43184] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,261][43188] Decorrelating experience for 0 frames... -[2024-07-05 11:39:39,294][43182] Decorrelating experience for 64 frames... -[2024-07-05 11:39:39,297][43211] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,312][43189] Decorrelating experience for 64 frames... -[2024-07-05 11:39:39,362][43190] Decorrelating experience for 96 frames... -[2024-07-05 11:39:39,374][25826] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 70008832. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 11:39:39,428][43212] Decorrelating experience for 64 frames... -[2024-07-05 11:39:39,461][43184] Decorrelating experience for 64 frames... -[2024-07-05 11:39:39,472][43182] Decorrelating experience for 96 frames... -[2024-07-05 11:39:39,485][43206] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,512][43185] Decorrelating experience for 64 frames... -[2024-07-05 11:39:39,526][43188] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,554][43209] Decorrelating experience for 96 frames... -[2024-07-05 11:39:39,612][43212] Decorrelating experience for 96 frames... -[2024-07-05 11:39:39,652][43187] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,653][43190] Decorrelating experience for 128 frames... -[2024-07-05 11:39:39,685][43210] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,699][43189] Decorrelating experience for 96 frames... -[2024-07-05 11:39:39,701][43207] Decorrelating experience for 64 frames... -[2024-07-05 11:39:39,726][43182] Decorrelating experience for 128 frames... -[2024-07-05 11:39:39,841][43209] Decorrelating experience for 128 frames... -[2024-07-05 11:39:39,851][43212] Decorrelating experience for 128 frames... -[2024-07-05 11:39:39,878][43208] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,880][43185] Decorrelating experience for 96 frames... -[2024-07-05 11:39:39,891][43186] Decorrelating experience for 32 frames... -[2024-07-05 11:39:39,960][43184] Decorrelating experience for 96 frames... -[2024-07-05 11:39:40,050][43188] Decorrelating experience for 64 frames... -[2024-07-05 11:39:40,070][43211] Decorrelating experience for 64 frames... -[2024-07-05 11:39:40,084][43207] Decorrelating experience for 96 frames... -[2024-07-05 11:39:40,105][43208] Decorrelating experience for 64 frames... -[2024-07-05 11:39:40,174][43185] Decorrelating experience for 128 frames... -[2024-07-05 11:39:40,212][43186] Decorrelating experience for 64 frames... -[2024-07-05 11:39:40,242][43211] Decorrelating experience for 96 frames... -[2024-07-05 11:39:40,243][43189] Decorrelating experience for 128 frames... -[2024-07-05 11:39:40,248][43183] Decorrelating experience for 96 frames... -[2024-07-05 11:39:40,344][43207] Decorrelating experience for 128 frames... -[2024-07-05 11:39:40,353][43190] Decorrelating experience for 160 frames... -[2024-07-05 11:39:40,405][43184] Decorrelating experience for 128 frames... -[2024-07-05 11:39:40,435][43187] Decorrelating experience for 64 frames... -[2024-07-05 11:39:40,444][43188] Decorrelating experience for 96 frames... -[2024-07-05 11:39:40,449][43185] Decorrelating experience for 160 frames... -[2024-07-05 11:39:40,449][43189] Decorrelating experience for 160 frames... -[2024-07-05 11:39:40,460][43182] Decorrelating experience for 160 frames... -[2024-07-05 11:39:40,521][43210] Decorrelating experience for 64 frames... -[2024-07-05 11:39:40,565][43207] Decorrelating experience for 160 frames... -[2024-07-05 11:39:40,608][43212] Decorrelating experience for 160 frames... -[2024-07-05 11:39:40,625][43190] Decorrelating experience for 192 frames... -[2024-07-05 11:39:40,679][43183] Decorrelating experience for 128 frames... -[2024-07-05 11:39:40,686][43208] Decorrelating experience for 96 frames... -[2024-07-05 11:39:40,703][43210] Decorrelating experience for 96 frames... -[2024-07-05 11:39:40,747][43182] Decorrelating experience for 192 frames... -[2024-07-05 11:39:40,787][43189] Decorrelating experience for 192 frames... -[2024-07-05 11:39:40,814][43211] Decorrelating experience for 128 frames... -[2024-07-05 11:39:40,894][43188] Decorrelating experience for 128 frames... -[2024-07-05 11:39:40,894][43184] Decorrelating experience for 160 frames... -[2024-07-05 11:39:40,909][43209] Decorrelating experience for 160 frames... -[2024-07-05 11:39:40,925][43183] Decorrelating experience for 160 frames... -[2024-07-05 11:39:41,003][43207] Decorrelating experience for 192 frames... -[2024-07-05 11:39:41,004][43206] Decorrelating experience for 64 frames... -[2024-07-05 11:39:41,020][43187] Decorrelating experience for 96 frames... -[2024-07-05 11:39:41,038][43210] Decorrelating experience for 128 frames... -[2024-07-05 11:39:41,082][43189] Decorrelating experience for 224 frames... -[2024-07-05 11:39:41,083][43211] Decorrelating experience for 160 frames... -[2024-07-05 11:39:41,159][43185] Decorrelating experience for 192 frames... -[2024-07-05 11:39:41,168][43184] Decorrelating experience for 192 frames... -[2024-07-05 11:39:41,231][43183] Decorrelating experience for 192 frames... -[2024-07-05 11:39:41,243][43212] Decorrelating experience for 192 frames... -[2024-07-05 11:39:41,245][43209] Decorrelating experience for 192 frames... -[2024-07-05 11:39:41,273][43188] Decorrelating experience for 160 frames... -[2024-07-05 11:39:41,308][43182] Decorrelating experience for 224 frames... -[2024-07-05 11:39:41,357][43206] Decorrelating experience for 96 frames... -[2024-07-05 11:39:41,375][43211] Decorrelating experience for 192 frames... -[2024-07-05 11:39:41,428][43187] Decorrelating experience for 128 frames... -[2024-07-05 11:39:41,481][43186] Decorrelating experience for 96 frames... -[2024-07-05 11:39:41,524][43183] Decorrelating experience for 224 frames... -[2024-07-05 11:39:41,526][43210] Decorrelating experience for 160 frames... -[2024-07-05 11:39:41,549][43212] Decorrelating experience for 224 frames... -[2024-07-05 11:39:41,552][43209] Decorrelating experience for 224 frames... -[2024-07-05 11:39:41,565][43208] Decorrelating experience for 128 frames... -[2024-07-05 11:39:41,730][43187] Decorrelating experience for 160 frames... -[2024-07-05 11:39:41,775][43190] Decorrelating experience for 224 frames... -[2024-07-05 11:39:41,797][43188] Decorrelating experience for 192 frames... -[2024-07-05 11:39:41,798][43206] Decorrelating experience for 128 frames... -[2024-07-05 11:39:41,809][43185] Decorrelating experience for 224 frames... -[2024-07-05 11:39:41,847][43210] Decorrelating experience for 192 frames... -[2024-07-05 11:39:41,937][43186] Decorrelating experience for 128 frames... -[2024-07-05 11:39:42,005][43207] Decorrelating experience for 224 frames... -[2024-07-05 11:39:42,054][43211] Decorrelating experience for 224 frames... -[2024-07-05 11:39:42,105][43188] Decorrelating experience for 224 frames... -[2024-07-05 11:39:42,117][43208] Decorrelating experience for 160 frames... -[2024-07-05 11:39:42,170][43187] Decorrelating experience for 192 frames... -[2024-07-05 11:39:42,227][43206] Decorrelating experience for 160 frames... -[2024-07-05 11:39:42,292][43210] Decorrelating experience for 224 frames... -[2024-07-05 11:39:42,393][43208] Decorrelating experience for 192 frames... -[2024-07-05 11:39:42,411][43186] Decorrelating experience for 160 frames... -[2024-07-05 11:39:42,510][43184] Decorrelating experience for 224 frames... -[2024-07-05 11:39:42,592][43206] Decorrelating experience for 192 frames... -[2024-07-05 11:39:42,602][43161] Signal inference workers to stop experience collection... -[2024-07-05 11:39:42,610][43181] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 11:39:42,700][43187] Decorrelating experience for 224 frames... -[2024-07-05 11:39:42,721][43208] Decorrelating experience for 224 frames... -[2024-07-05 11:39:42,733][43186] Decorrelating experience for 192 frames... -[2024-07-05 11:39:42,833][43206] Decorrelating experience for 224 frames... -[2024-07-05 11:39:42,981][43186] Decorrelating experience for 224 frames... -[2024-07-05 11:39:44,210][43161] Signal inference workers to resume experience collection... -[2024-07-05 11:39:44,210][43181] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 11:39:44,374][25826] Fps is (10 sec: 1638.3, 60 sec: 1638.3, 300 sec: 1638.3). Total num frames: 70017024. Throughput: 0: 1122.4. Samples: 5612. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 11:39:44,376][25826] Avg episode reward: [(0, '1.200')] -[2024-07-05 11:39:46,113][43181] Updated weights for policy 0, policy_version 10998 (0.0111) -[2024-07-05 11:39:48,032][43181] Updated weights for policy 0, policy_version 11008 (0.0011) -[2024-07-05 11:39:49,375][25826] Fps is (10 sec: 22117.0, 60 sec: 22117.0, 300 sec: 22117.0). Total num frames: 70230016. Throughput: 0: 3320.2. Samples: 33204. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 11:39:49,376][25826] Avg episode reward: [(0, '24.903')] -[2024-07-05 11:39:49,869][43181] Updated weights for policy 0, policy_version 11018 (0.0008) -[2024-07-05 11:39:51,651][43181] Updated weights for policy 0, policy_version 11028 (0.0008) -[2024-07-05 11:39:52,494][25826] Heartbeat connected on Batcher_0 -[2024-07-05 11:39:52,497][25826] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 11:39:52,506][25826] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 11:39:52,507][25826] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 11:39:52,514][25826] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 11:39:52,515][25826] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 11:39:52,516][25826] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 11:39:52,520][25826] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 11:39:52,527][25826] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 11:39:52,528][25826] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 11:39:52,533][25826] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 11:39:52,534][25826] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 11:39:52,536][25826] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 11:39:52,540][25826] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 11:39:52,547][25826] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 11:39:52,548][25826] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 11:39:52,551][25826] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 11:39:52,554][25826] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 11:39:52,554][25826] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 11:39:53,534][43181] Updated weights for policy 0, policy_version 11038 (0.0008) -[2024-07-05 11:39:54,376][25826] Fps is (10 sec: 43417.6, 60 sec: 29490.8, 300 sec: 29490.8). Total num frames: 70451200. Throughput: 0: 6717.0. Samples: 100756. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:39:54,385][25826] Avg episode reward: [(0, '41.835')] -[2024-07-05 11:39:55,430][43181] Updated weights for policy 0, policy_version 11048 (0.0011) -[2024-07-05 11:39:57,220][43181] Updated weights for policy 0, policy_version 11058 (0.0016) -[2024-07-05 11:39:59,277][43181] Updated weights for policy 0, policy_version 11068 (0.0011) -[2024-07-05 11:39:59,374][25826] Fps is (10 sec: 43420.0, 60 sec: 32767.9, 300 sec: 32767.9). Total num frames: 70664192. Throughput: 0: 8222.4. Samples: 164448. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:39:59,379][25826] Avg episode reward: [(0, '47.605')] -[2024-07-05 11:40:01,249][43181] Updated weights for policy 0, policy_version 11078 (0.0012) -[2024-07-05 11:40:03,242][43181] Updated weights for policy 0, policy_version 11088 (0.0011) -[2024-07-05 11:40:04,376][25826] Fps is (10 sec: 41779.8, 60 sec: 34406.3, 300 sec: 34406.3). Total num frames: 70868992. Throughput: 0: 7816.9. Samples: 195424. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:40:04,388][25826] Avg episode reward: [(0, '42.638')] -[2024-07-05 11:40:05,251][43181] Updated weights for policy 0, policy_version 11098 (0.0017) -[2024-07-05 11:40:07,236][43181] Updated weights for policy 0, policy_version 11108 (0.0009) -[2024-07-05 11:40:09,106][43181] Updated weights for policy 0, policy_version 11118 (0.0011) -[2024-07-05 11:40:09,374][25826] Fps is (10 sec: 41779.5, 60 sec: 35771.8, 300 sec: 35771.8). Total num frames: 71081984. Throughput: 0: 8597.2. Samples: 257916. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:40:09,375][25826] Avg episode reward: [(0, '45.324')] -[2024-07-05 11:40:11,012][43181] Updated weights for policy 0, policy_version 11128 (0.0008) -[2024-07-05 11:40:12,903][43181] Updated weights for policy 0, policy_version 11138 (0.0008) -[2024-07-05 11:40:14,374][25826] Fps is (10 sec: 42598.6, 60 sec: 36747.0, 300 sec: 36747.0). Total num frames: 71294976. Throughput: 0: 9235.9. Samples: 323256. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:40:14,375][25826] Avg episode reward: [(0, '46.042')] -[2024-07-05 11:40:14,771][43181] Updated weights for policy 0, policy_version 11148 (0.0008) -[2024-07-05 11:40:16,652][43181] Updated weights for policy 0, policy_version 11158 (0.0009) -[2024-07-05 11:40:18,511][43181] Updated weights for policy 0, policy_version 11168 (0.0008) -[2024-07-05 11:40:19,374][25826] Fps is (10 sec: 43417.3, 60 sec: 37683.1, 300 sec: 37683.1). Total num frames: 71516160. Throughput: 0: 8872.8. Samples: 354912. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:40:19,375][25826] Avg episode reward: [(0, '42.693')] -[2024-07-05 11:40:20,317][43181] Updated weights for policy 0, policy_version 11178 (0.0009) -[2024-07-05 11:40:22,124][43181] Updated weights for policy 0, policy_version 11188 (0.0008) -[2024-07-05 11:40:23,943][43181] Updated weights for policy 0, policy_version 11198 (0.0008) -[2024-07-05 11:40:24,374][25826] Fps is (10 sec: 45055.9, 60 sec: 38593.4, 300 sec: 38593.4). Total num frames: 71745536. Throughput: 0: 9393.2. Samples: 422692. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:40:24,375][25826] Avg episode reward: [(0, '43.401')] -[2024-07-05 11:40:25,735][43181] Updated weights for policy 0, policy_version 11208 (0.0010) -[2024-07-05 11:40:27,574][43181] Updated weights for policy 0, policy_version 11218 (0.0008) -[2024-07-05 11:40:29,333][43181] Updated weights for policy 0, policy_version 11228 (0.0009) -[2024-07-05 11:40:29,374][25826] Fps is (10 sec: 45874.3, 60 sec: 39321.4, 300 sec: 39321.4). Total num frames: 71974912. Throughput: 0: 10780.1. Samples: 490716. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:40:29,375][25826] Avg episode reward: [(0, '44.326')] -[2024-07-05 11:40:31,160][43181] Updated weights for policy 0, policy_version 11238 (0.0010) -[2024-07-05 11:40:32,964][43181] Updated weights for policy 0, policy_version 11248 (0.0008) -[2024-07-05 11:40:34,374][25826] Fps is (10 sec: 45056.2, 60 sec: 39768.5, 300 sec: 39768.5). Total num frames: 72196096. Throughput: 0: 10921.6. Samples: 524668. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:40:34,375][25826] Avg episode reward: [(0, '42.330')] -[2024-07-05 11:40:34,773][43181] Updated weights for policy 0, policy_version 11258 (0.0011) -[2024-07-05 11:40:36,621][43181] Updated weights for policy 0, policy_version 11268 (0.0010) -[2024-07-05 11:40:38,433][43181] Updated weights for policy 0, policy_version 11278 (0.0011) -[2024-07-05 11:40:39,374][25826] Fps is (10 sec: 45056.1, 60 sec: 40277.2, 300 sec: 40277.2). Total num frames: 72425472. Throughput: 0: 10926.4. Samples: 592444. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:40:39,375][25826] Avg episode reward: [(0, '48.619')] -[2024-07-05 11:40:39,380][43161] Saving new best policy, reward=48.619! -[2024-07-05 11:40:40,246][43181] Updated weights for policy 0, policy_version 11288 (0.0013) -[2024-07-05 11:40:42,056][43181] Updated weights for policy 0, policy_version 11298 (0.0008) -[2024-07-05 11:40:43,831][43181] Updated weights for policy 0, policy_version 11308 (0.0009) -[2024-07-05 11:40:44,374][25826] Fps is (10 sec: 45055.5, 60 sec: 43827.3, 300 sec: 40581.9). Total num frames: 72646656. Throughput: 0: 11024.0. Samples: 660528. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:40:44,375][25826] Avg episode reward: [(0, '41.804')] -[2024-07-05 11:40:45,701][43181] Updated weights for policy 0, policy_version 11318 (0.0008) -[2024-07-05 11:40:47,476][43181] Updated weights for policy 0, policy_version 11328 (0.0015) -[2024-07-05 11:40:49,271][43181] Updated weights for policy 0, policy_version 11338 (0.0007) -[2024-07-05 11:40:49,374][25826] Fps is (10 sec: 45057.2, 60 sec: 44100.8, 300 sec: 40960.0). Total num frames: 72876032. Throughput: 0: 11090.5. Samples: 694496. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:40:49,374][25826] Avg episode reward: [(0, '44.345')] -[2024-07-05 11:40:51,094][43181] Updated weights for policy 0, policy_version 11348 (0.0014) -[2024-07-05 11:40:52,884][43181] Updated weights for policy 0, policy_version 11358 (0.0008) -[2024-07-05 11:40:54,374][25826] Fps is (10 sec: 45875.7, 60 sec: 44237.0, 300 sec: 41287.7). Total num frames: 73105408. Throughput: 0: 11212.4. Samples: 762476. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:40:54,375][25826] Avg episode reward: [(0, '43.575')] -[2024-07-05 11:40:54,702][43181] Updated weights for policy 0, policy_version 11368 (0.0010) -[2024-07-05 11:40:56,514][43181] Updated weights for policy 0, policy_version 11378 (0.0008) -[2024-07-05 11:40:58,339][43181] Updated weights for policy 0, policy_version 11388 (0.0008) -[2024-07-05 11:40:59,374][25826] Fps is (10 sec: 45055.4, 60 sec: 44373.3, 300 sec: 41472.0). Total num frames: 73326592. Throughput: 0: 11258.1. Samples: 829872. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:40:59,375][25826] Avg episode reward: [(0, '43.848')] -[2024-07-05 11:41:00,171][43181] Updated weights for policy 0, policy_version 11398 (0.0008) -[2024-07-05 11:41:02,054][43181] Updated weights for policy 0, policy_version 11408 (0.0009) -[2024-07-05 11:41:04,018][43181] Updated weights for policy 0, policy_version 11418 (0.0008) -[2024-07-05 11:41:04,374][25826] Fps is (10 sec: 43417.6, 60 sec: 44509.9, 300 sec: 41538.3). Total num frames: 73539584. Throughput: 0: 11276.2. Samples: 862340. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:41:04,375][25826] Avg episode reward: [(0, '45.347')] -[2024-07-05 11:41:05,918][43181] Updated weights for policy 0, policy_version 11428 (0.0008) -[2024-07-05 11:41:07,820][43181] Updated weights for policy 0, policy_version 11438 (0.0008) -[2024-07-05 11:41:09,374][25826] Fps is (10 sec: 42598.0, 60 sec: 44509.7, 300 sec: 41597.1). Total num frames: 73752576. Throughput: 0: 11214.6. Samples: 927352. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:41:09,375][25826] Avg episode reward: [(0, '41.666')] -[2024-07-05 11:41:09,739][43181] Updated weights for policy 0, policy_version 11448 (0.0008) -[2024-07-05 11:41:11,615][43181] Updated weights for policy 0, policy_version 11458 (0.0011) -[2024-07-05 11:41:13,465][43181] Updated weights for policy 0, policy_version 11468 (0.0008) -[2024-07-05 11:41:14,374][25826] Fps is (10 sec: 43417.1, 60 sec: 44646.3, 300 sec: 41736.1). Total num frames: 73973760. Throughput: 0: 11154.1. Samples: 992648. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:41:14,375][25826] Avg episode reward: [(0, '41.398')] -[2024-07-05 11:41:15,263][43181] Updated weights for policy 0, policy_version 11478 (0.0008) -[2024-07-05 11:41:17,106][43181] Updated weights for policy 0, policy_version 11488 (0.0008) -[2024-07-05 11:41:18,931][43181] Updated weights for policy 0, policy_version 11498 (0.0012) -[2024-07-05 11:41:19,374][25826] Fps is (10 sec: 45056.7, 60 sec: 44783.0, 300 sec: 41943.0). Total num frames: 74203136. Throughput: 0: 11152.3. Samples: 1026524. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:41:19,375][25826] Avg episode reward: [(0, '45.534')] -[2024-07-05 11:41:20,770][43181] Updated weights for policy 0, policy_version 11508 (0.0012) -[2024-07-05 11:41:22,576][43181] Updated weights for policy 0, policy_version 11518 (0.0008) -[2024-07-05 11:41:24,344][43181] Updated weights for policy 0, policy_version 11528 (0.0009) -[2024-07-05 11:41:24,374][25826] Fps is (10 sec: 45875.6, 60 sec: 44783.0, 300 sec: 42130.3). Total num frames: 74432512. Throughput: 0: 11147.3. Samples: 1094072. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:41:24,375][25826] Avg episode reward: [(0, '43.059')] -[2024-07-05 11:41:26,152][43181] Updated weights for policy 0, policy_version 11538 (0.0010) -[2024-07-05 11:41:28,075][43181] Updated weights for policy 0, policy_version 11548 (0.0008) -[2024-07-05 11:41:29,374][25826] Fps is (10 sec: 45055.3, 60 sec: 44646.5, 300 sec: 42226.0). Total num frames: 74653696. Throughput: 0: 11120.2. Samples: 1160940. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:41:29,375][25826] Avg episode reward: [(0, '44.268')] -[2024-07-05 11:41:29,390][43161] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000011555_74653696.pth... -[2024-07-05 11:41:29,475][43161] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000010530_66256896.pth -[2024-07-05 11:41:29,906][43181] Updated weights for policy 0, policy_version 11558 (0.0009) -[2024-07-05 11:41:31,721][43181] Updated weights for policy 0, policy_version 11568 (0.0008) -[2024-07-05 11:41:33,584][43181] Updated weights for policy 0, policy_version 11578 (0.0008) -[2024-07-05 11:41:34,374][25826] Fps is (10 sec: 44236.7, 60 sec: 44646.4, 300 sec: 42313.5). Total num frames: 74874880. Throughput: 0: 11111.9. Samples: 1194532. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:41:34,375][25826] Avg episode reward: [(0, '44.123')] -[2024-07-05 11:41:35,376][43181] Updated weights for policy 0, policy_version 11588 (0.0008) -[2024-07-05 11:41:37,207][43181] Updated weights for policy 0, policy_version 11598 (0.0008) -[2024-07-05 11:41:39,043][43181] Updated weights for policy 0, policy_version 11608 (0.0012) -[2024-07-05 11:41:39,374][25826] Fps is (10 sec: 44237.2, 60 sec: 44510.0, 300 sec: 42393.6). Total num frames: 75096064. Throughput: 0: 11089.8. Samples: 1261516. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:41:39,375][25826] Avg episode reward: [(0, '43.357')] -[2024-07-05 11:41:40,873][43181] Updated weights for policy 0, policy_version 11618 (0.0011) -[2024-07-05 11:41:42,712][43181] Updated weights for policy 0, policy_version 11628 (0.0010) -[2024-07-05 11:41:44,374][25826] Fps is (10 sec: 45056.3, 60 sec: 44646.5, 300 sec: 42532.9). Total num frames: 75325440. Throughput: 0: 11092.5. Samples: 1329032. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:41:44,375][25826] Avg episode reward: [(0, '46.127')] -[2024-07-05 11:41:44,550][43181] Updated weights for policy 0, policy_version 11638 (0.0009) -[2024-07-05 11:41:46,351][43181] Updated weights for policy 0, policy_version 11648 (0.0008) -[2024-07-05 11:41:48,152][43181] Updated weights for policy 0, policy_version 11658 (0.0011) -[2024-07-05 11:41:49,374][25826] Fps is (10 sec: 45056.5, 60 sec: 44509.8, 300 sec: 42598.4). Total num frames: 75546624. Throughput: 0: 11106.0. Samples: 1362112. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:41:49,375][25826] Avg episode reward: [(0, '45.923')] -[2024-07-05 11:41:50,000][43181] Updated weights for policy 0, policy_version 11668 (0.0012) -[2024-07-05 11:41:51,846][43181] Updated weights for policy 0, policy_version 11678 (0.0011) -[2024-07-05 11:41:53,800][43181] Updated weights for policy 0, policy_version 11688 (0.0009) -[2024-07-05 11:41:54,374][25826] Fps is (10 sec: 43417.6, 60 sec: 44236.8, 300 sec: 42598.4). Total num frames: 75759616. Throughput: 0: 11138.5. Samples: 1428580. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:41:54,375][25826] Avg episode reward: [(0, '44.827')] -[2024-07-05 11:41:55,734][43181] Updated weights for policy 0, policy_version 11698 (0.0008) -[2024-07-05 11:41:57,592][43181] Updated weights for policy 0, policy_version 11708 (0.0014) -[2024-07-05 11:41:59,374][25826] Fps is (10 sec: 43417.5, 60 sec: 44236.9, 300 sec: 42656.9). Total num frames: 75980800. Throughput: 0: 11153.6. Samples: 1494560. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 11:41:59,375][25826] Avg episode reward: [(0, '43.967')] -[2024-07-05 11:41:59,424][43181] Updated weights for policy 0, policy_version 11718 (0.0008) -[2024-07-05 11:42:01,244][43181] Updated weights for policy 0, policy_version 11728 (0.0008) -[2024-07-05 11:42:03,052][43181] Updated weights for policy 0, policy_version 11738 (0.0008) -[2024-07-05 11:42:04,374][25826] Fps is (10 sec: 45055.6, 60 sec: 44509.8, 300 sec: 42767.9). Total num frames: 76210176. Throughput: 0: 11145.1. Samples: 1528052. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:42:04,376][25826] Avg episode reward: [(0, '46.027')] -[2024-07-05 11:42:04,819][43181] Updated weights for policy 0, policy_version 11748 (0.0012) -[2024-07-05 11:42:06,641][43181] Updated weights for policy 0, policy_version 11758 (0.0011) -[2024-07-05 11:42:08,495][43181] Updated weights for policy 0, policy_version 11768 (0.0008) -[2024-07-05 11:42:09,374][25826] Fps is (10 sec: 45056.0, 60 sec: 44646.5, 300 sec: 42816.9). Total num frames: 76431360. Throughput: 0: 11144.4. Samples: 1595572. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:42:09,375][25826] Avg episode reward: [(0, '44.715')] -[2024-07-05 11:42:10,323][43181] Updated weights for policy 0, policy_version 11778 (0.0009) -[2024-07-05 11:42:12,183][43181] Updated weights for policy 0, policy_version 11788 (0.0010) -[2024-07-05 11:42:13,971][43181] Updated weights for policy 0, policy_version 11798 (0.0009) -[2024-07-05 11:42:14,374][25826] Fps is (10 sec: 45056.4, 60 sec: 44783.0, 300 sec: 42915.5). Total num frames: 76660736. Throughput: 0: 11151.5. Samples: 1662756. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:42:14,375][25826] Avg episode reward: [(0, '48.145')] -[2024-07-05 11:42:15,793][43181] Updated weights for policy 0, policy_version 11808 (0.0009) -[2024-07-05 11:42:17,594][43181] Updated weights for policy 0, policy_version 11818 (0.0011) -[2024-07-05 11:42:19,374][25826] Fps is (10 sec: 45056.0, 60 sec: 44646.4, 300 sec: 42956.8). Total num frames: 76881920. Throughput: 0: 11161.8. Samples: 1696812. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:42:19,375][25826] Avg episode reward: [(0, '45.630')] -[2024-07-05 11:42:19,426][43181] Updated weights for policy 0, policy_version 11828 (0.0011) -[2024-07-05 11:42:21,202][43181] Updated weights for policy 0, policy_version 11838 (0.0008) -[2024-07-05 11:42:23,038][43181] Updated weights for policy 0, policy_version 11848 (0.0010) -[2024-07-05 11:42:24,374][25826] Fps is (10 sec: 45055.2, 60 sec: 44646.3, 300 sec: 43045.2). Total num frames: 77111296. Throughput: 0: 11177.9. Samples: 1764520. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:42:24,375][25826] Avg episode reward: [(0, '47.544')] -[2024-07-05 11:42:24,887][43181] Updated weights for policy 0, policy_version 11858 (0.0011) -[2024-07-05 11:42:26,684][43181] Updated weights for policy 0, policy_version 11868 (0.0009) -[2024-07-05 11:42:28,485][43181] Updated weights for policy 0, policy_version 11878 (0.0008) -[2024-07-05 11:42:29,374][25826] Fps is (10 sec: 45053.9, 60 sec: 44646.2, 300 sec: 43080.2). Total num frames: 77332480. Throughput: 0: 11170.7. Samples: 1831720. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:42:29,375][25826] Avg episode reward: [(0, '44.538')] -[2024-07-05 11:42:30,287][43181] Updated weights for policy 0, policy_version 11888 (0.0008) -[2024-07-05 11:42:32,104][43181] Updated weights for policy 0, policy_version 11898 (0.0009) -[2024-07-05 11:42:33,915][43181] Updated weights for policy 0, policy_version 11908 (0.0008) -[2024-07-05 11:42:34,374][25826] Fps is (10 sec: 45055.9, 60 sec: 44782.8, 300 sec: 43160.1). Total num frames: 77561856. Throughput: 0: 11191.2. Samples: 1865716. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:42:34,376][25826] Avg episode reward: [(0, '46.505')] -[2024-07-05 11:42:35,751][43181] Updated weights for policy 0, policy_version 11918 (0.0008) -[2024-07-05 11:42:37,605][43181] Updated weights for policy 0, policy_version 11928 (0.0008) -[2024-07-05 11:42:39,374][25826] Fps is (10 sec: 45058.2, 60 sec: 44783.0, 300 sec: 43190.0). Total num frames: 77783040. Throughput: 0: 11213.9. Samples: 1933208. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:42:39,375][25826] Avg episode reward: [(0, '45.647')] -[2024-07-05 11:42:39,419][43181] Updated weights for policy 0, policy_version 11938 (0.0008) -[2024-07-05 11:42:41,232][43181] Updated weights for policy 0, policy_version 11948 (0.0008) -[2024-07-05 11:42:43,081][43181] Updated weights for policy 0, policy_version 11958 (0.0009) -[2024-07-05 11:42:44,374][25826] Fps is (10 sec: 45056.6, 60 sec: 44782.9, 300 sec: 43262.6). Total num frames: 78012416. Throughput: 0: 11256.4. Samples: 2001100. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:42:44,375][25826] Avg episode reward: [(0, '43.649')] -[2024-07-05 11:42:44,877][43181] Updated weights for policy 0, policy_version 11968 (0.0009) -[2024-07-05 11:42:46,711][43181] Updated weights for policy 0, policy_version 11978 (0.0009) -[2024-07-05 11:42:48,523][43181] Updated weights for policy 0, policy_version 11988 (0.0009) -[2024-07-05 11:42:49,374][25826] Fps is (10 sec: 45055.6, 60 sec: 44782.8, 300 sec: 43288.2). Total num frames: 78233600. Throughput: 0: 11252.7. Samples: 2034424. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:42:49,375][25826] Avg episode reward: [(0, '42.540')] -[2024-07-05 11:42:50,332][43181] Updated weights for policy 0, policy_version 11998 (0.0008) -[2024-07-05 11:42:52,168][43181] Updated weights for policy 0, policy_version 12008 (0.0011) -[2024-07-05 11:42:54,048][43181] Updated weights for policy 0, policy_version 12018 (0.0010) -[2024-07-05 11:42:54,374][25826] Fps is (10 sec: 44236.7, 60 sec: 44919.4, 300 sec: 43312.6). Total num frames: 78454784. Throughput: 0: 11249.3. Samples: 2101792. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:42:54,375][25826] Avg episode reward: [(0, '41.810')] -[2024-07-05 11:42:55,864][43181] Updated weights for policy 0, policy_version 12028 (0.0008) -[2024-07-05 11:42:57,614][43181] Updated weights for policy 0, policy_version 12038 (0.0011) -[2024-07-05 11:42:59,374][25826] Fps is (10 sec: 45056.5, 60 sec: 45056.0, 300 sec: 43376.6). Total num frames: 78684160. Throughput: 0: 11250.0. Samples: 2169008. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:42:59,375][25826] Avg episode reward: [(0, '44.223')] -[2024-07-05 11:42:59,457][43181] Updated weights for policy 0, policy_version 12048 (0.0008) -[2024-07-05 11:43:01,289][43181] Updated weights for policy 0, policy_version 12058 (0.0008) -[2024-07-05 11:43:03,083][43181] Updated weights for policy 0, policy_version 12068 (0.0008) -[2024-07-05 11:43:04,374][25826] Fps is (10 sec: 45875.7, 60 sec: 45056.1, 300 sec: 43437.6). Total num frames: 78913536. Throughput: 0: 11246.0. Samples: 2202880. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:43:04,375][25826] Avg episode reward: [(0, '47.091')] -[2024-07-05 11:43:04,882][43181] Updated weights for policy 0, policy_version 12078 (0.0009) -[2024-07-05 11:43:06,689][43181] Updated weights for policy 0, policy_version 12088 (0.0008) -[2024-07-05 11:43:08,557][43181] Updated weights for policy 0, policy_version 12098 (0.0013) -[2024-07-05 11:43:09,374][25826] Fps is (10 sec: 45055.5, 60 sec: 45055.9, 300 sec: 43456.6). Total num frames: 79134720. Throughput: 0: 11240.5. Samples: 2270344. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 11:43:09,375][25826] Avg episode reward: [(0, '48.291')] -[2024-07-05 11:43:10,407][43181] Updated weights for policy 0, policy_version 12108 (0.0011) -[2024-07-05 11:43:12,237][43181] Updated weights for policy 0, policy_version 12118 (0.0008) -[2024-07-05 11:43:14,053][43181] Updated weights for policy 0, policy_version 12128 (0.0009) -[2024-07-05 11:43:14,374][25826] Fps is (10 sec: 44235.9, 60 sec: 44919.3, 300 sec: 43474.7). Total num frames: 79355904. Throughput: 0: 11241.3. Samples: 2337576. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 11:43:14,375][25826] Avg episode reward: [(0, '47.436')] -[2024-07-05 11:43:15,893][43181] Updated weights for policy 0, policy_version 12138 (0.0008) -[2024-07-05 11:43:17,700][43181] Updated weights for policy 0, policy_version 12148 (0.0008) -[2024-07-05 11:43:19,374][25826] Fps is (10 sec: 45056.3, 60 sec: 45056.0, 300 sec: 43529.3). Total num frames: 79585280. Throughput: 0: 11243.8. Samples: 2371688. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:43:19,375][25826] Avg episode reward: [(0, '44.407')] -[2024-07-05 11:43:19,497][43181] Updated weights for policy 0, policy_version 12158 (0.0008) -[2024-07-05 11:43:21,325][43181] Updated weights for policy 0, policy_version 12168 (0.0008) -[2024-07-05 11:43:23,117][43181] Updated weights for policy 0, policy_version 12178 (0.0009) -[2024-07-05 11:43:24,374][25826] Fps is (10 sec: 45056.4, 60 sec: 44919.5, 300 sec: 43545.0). Total num frames: 79806464. Throughput: 0: 11242.4. Samples: 2439116. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:43:24,375][25826] Avg episode reward: [(0, '48.964')] -[2024-07-05 11:43:24,376][43161] Saving new best policy, reward=48.964! -[2024-07-05 11:43:24,943][43181] Updated weights for policy 0, policy_version 12188 (0.0008) -[2024-07-05 11:43:26,732][43181] Updated weights for policy 0, policy_version 12198 (0.0007) -[2024-07-05 11:43:28,578][43181] Updated weights for policy 0, policy_version 12208 (0.0010) -[2024-07-05 11:43:29,374][25826] Fps is (10 sec: 45056.2, 60 sec: 45056.4, 300 sec: 43595.7). Total num frames: 80035840. Throughput: 0: 11242.1. Samples: 2506996. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:43:29,375][25826] Avg episode reward: [(0, '46.462')] -[2024-07-05 11:43:29,379][43161] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000012212_80035840.pth... -[2024-07-05 11:43:29,466][43161] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000010988_70008832.pth -[2024-07-05 11:43:30,403][43181] Updated weights for policy 0, policy_version 12218 (0.0008) -[2024-07-05 11:43:32,232][43181] Updated weights for policy 0, policy_version 12228 (0.0009) -[2024-07-05 11:43:34,076][43181] Updated weights for policy 0, policy_version 12238 (0.0009) -[2024-07-05 11:43:34,374][25826] Fps is (10 sec: 45056.5, 60 sec: 44919.6, 300 sec: 43609.3). Total num frames: 80257024. Throughput: 0: 11249.5. Samples: 2540648. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:43:34,374][25826] Avg episode reward: [(0, '45.668')] -[2024-07-05 11:43:35,892][43181] Updated weights for policy 0, policy_version 12248 (0.0015) -[2024-07-05 11:43:37,731][43181] Updated weights for policy 0, policy_version 12258 (0.0012) -[2024-07-05 11:43:39,374][25826] Fps is (10 sec: 45055.5, 60 sec: 45055.9, 300 sec: 43656.5). Total num frames: 80486400. Throughput: 0: 11249.8. Samples: 2608032. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:43:39,375][25826] Avg episode reward: [(0, '45.834')] -[2024-07-05 11:43:39,539][43181] Updated weights for policy 0, policy_version 12268 (0.0011) -[2024-07-05 11:43:41,313][43181] Updated weights for policy 0, policy_version 12278 (0.0008) -[2024-07-05 11:43:43,122][43181] Updated weights for policy 0, policy_version 12288 (0.0010) -[2024-07-05 11:43:44,374][25826] Fps is (10 sec: 45056.1, 60 sec: 44919.5, 300 sec: 43668.4). Total num frames: 80707584. Throughput: 0: 11253.4. Samples: 2675412. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:43:44,375][25826] Avg episode reward: [(0, '40.752')] -[2024-07-05 11:43:44,952][43181] Updated weights for policy 0, policy_version 12298 (0.0008) -[2024-07-05 11:43:46,825][43181] Updated weights for policy 0, policy_version 12308 (0.0010) -[2024-07-05 11:43:48,616][43181] Updated weights for policy 0, policy_version 12318 (0.0008) -[2024-07-05 11:43:49,374][25826] Fps is (10 sec: 44237.2, 60 sec: 44919.5, 300 sec: 43679.7). Total num frames: 80928768. Throughput: 0: 11252.9. Samples: 2709260. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:43:49,375][25826] Avg episode reward: [(0, '46.035')] -[2024-07-05 11:43:50,498][43181] Updated weights for policy 0, policy_version 12328 (0.0015) -[2024-07-05 11:43:52,282][43181] Updated weights for policy 0, policy_version 12338 (0.0008) -[2024-07-05 11:43:54,092][43181] Updated weights for policy 0, policy_version 12348 (0.0008) -[2024-07-05 11:43:54,374][25826] Fps is (10 sec: 45055.5, 60 sec: 45056.0, 300 sec: 43722.8). Total num frames: 81158144. Throughput: 0: 11246.3. Samples: 2776428. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:43:54,375][25826] Avg episode reward: [(0, '44.583')] -[2024-07-05 11:43:55,913][43181] Updated weights for policy 0, policy_version 12358 (0.0008) -[2024-07-05 11:43:57,723][43181] Updated weights for policy 0, policy_version 12368 (0.0009) -[2024-07-05 11:43:59,374][25826] Fps is (10 sec: 45056.0, 60 sec: 44919.4, 300 sec: 43732.7). Total num frames: 81379328. Throughput: 0: 11248.3. Samples: 2843748. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:43:59,375][25826] Avg episode reward: [(0, '46.905')] -[2024-07-05 11:43:59,591][43181] Updated weights for policy 0, policy_version 12378 (0.0009) -[2024-07-05 11:44:01,481][43181] Updated weights for policy 0, policy_version 12388 (0.0009) -[2024-07-05 11:44:03,387][43181] Updated weights for policy 0, policy_version 12398 (0.0011) -[2024-07-05 11:44:04,382][25826] Fps is (10 sec: 44236.8, 60 sec: 44782.9, 300 sec: 43742.2). Total num frames: 81600512. Throughput: 0: 11221.0. Samples: 2876632. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:44:04,384][25826] Avg episode reward: [(0, '42.087')] -[2024-07-05 11:44:05,074][43181] Updated weights for policy 0, policy_version 12408 (0.0007) -[2024-07-05 11:44:06,792][43181] Updated weights for policy 0, policy_version 12418 (0.0009) -[2024-07-05 11:44:08,540][43181] Updated weights for policy 0, policy_version 12428 (0.0009) -[2024-07-05 11:44:09,374][25826] Fps is (10 sec: 45875.4, 60 sec: 45056.1, 300 sec: 43812.0). Total num frames: 81838080. Throughput: 0: 11283.2. Samples: 2946860. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:44:09,375][25826] Avg episode reward: [(0, '45.306')] -[2024-07-05 11:44:10,235][43181] Updated weights for policy 0, policy_version 12438 (0.0007) -[2024-07-05 11:44:12,145][43181] Updated weights for policy 0, policy_version 12448 (0.0009) -[2024-07-05 11:44:14,353][43181] Updated weights for policy 0, policy_version 12458 (0.0016) -[2024-07-05 11:44:14,374][25826] Fps is (10 sec: 45055.9, 60 sec: 44919.5, 300 sec: 43790.0). Total num frames: 82051072. Throughput: 0: 11220.3. Samples: 3011908. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:44:14,378][25826] Avg episode reward: [(0, '45.136')] -[2024-07-05 11:44:16,371][43181] Updated weights for policy 0, policy_version 12468 (0.0011) -[2024-07-05 11:44:18,234][43181] Updated weights for policy 0, policy_version 12478 (0.0011) -[2024-07-05 11:44:19,375][25826] Fps is (10 sec: 41778.9, 60 sec: 44509.9, 300 sec: 43739.4). Total num frames: 82255872. Throughput: 0: 11133.5. Samples: 3041656. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:44:19,377][25826] Avg episode reward: [(0, '47.355')] -[2024-07-05 11:44:20,140][43181] Updated weights for policy 0, policy_version 12488 (0.0009) -[2024-07-05 11:44:22,070][43181] Updated weights for policy 0, policy_version 12498 (0.0008) -[2024-07-05 11:44:24,007][43181] Updated weights for policy 0, policy_version 12508 (0.0011) -[2024-07-05 11:44:24,375][25826] Fps is (10 sec: 42597.3, 60 sec: 44509.7, 300 sec: 43748.1). Total num frames: 82477056. Throughput: 0: 11060.5. Samples: 3105756. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:44:24,376][25826] Avg episode reward: [(0, '42.620')] -[2024-07-05 11:44:25,874][43181] Updated weights for policy 0, policy_version 12518 (0.0011) -[2024-07-05 11:44:27,683][43181] Updated weights for policy 0, policy_version 12528 (0.0012) -[2024-07-05 11:44:29,374][25826] Fps is (10 sec: 43417.7, 60 sec: 44236.8, 300 sec: 43728.3). Total num frames: 82690048. Throughput: 0: 11016.6. Samples: 3171160. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:44:29,377][25826] Avg episode reward: [(0, '45.691')] -[2024-07-05 11:44:29,702][43181] Updated weights for policy 0, policy_version 12538 (0.0008) -[2024-07-05 11:44:31,689][43181] Updated weights for policy 0, policy_version 12548 (0.0008) -[2024-07-05 11:44:33,602][43181] Updated weights for policy 0, policy_version 12558 (0.0009) -[2024-07-05 11:44:34,374][25826] Fps is (10 sec: 42599.7, 60 sec: 44100.2, 300 sec: 43709.2). Total num frames: 82903040. Throughput: 0: 10940.9. Samples: 3201600. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:44:34,375][25826] Avg episode reward: [(0, '44.058')] -[2024-07-05 11:44:35,468][43181] Updated weights for policy 0, policy_version 12568 (0.0011) -[2024-07-05 11:44:37,318][43181] Updated weights for policy 0, policy_version 12578 (0.0011) -[2024-07-05 11:44:39,159][43181] Updated weights for policy 0, policy_version 12588 (0.0008) -[2024-07-05 11:44:39,374][25826] Fps is (10 sec: 43417.7, 60 sec: 43963.8, 300 sec: 44431.2). Total num frames: 83124224. Throughput: 0: 10925.5. Samples: 3268076. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:44:39,383][25826] Avg episode reward: [(0, '44.179')] -[2024-07-05 11:44:40,964][43181] Updated weights for policy 0, policy_version 12598 (0.0007) -[2024-07-05 11:44:42,799][43181] Updated weights for policy 0, policy_version 12608 (0.0011) -[2024-07-05 11:44:44,374][25826] Fps is (10 sec: 44236.7, 60 sec: 43963.7, 300 sec: 44459.0). Total num frames: 83345408. Throughput: 0: 10910.1. Samples: 3334704. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:44:44,375][25826] Avg episode reward: [(0, '47.531')] -[2024-07-05 11:44:44,648][43181] Updated weights for policy 0, policy_version 12618 (0.0008) -[2024-07-05 11:44:46,486][43181] Updated weights for policy 0, policy_version 12628 (0.0008) -[2024-07-05 11:44:48,292][43181] Updated weights for policy 0, policy_version 12638 (0.0008) -[2024-07-05 11:44:49,374][25826] Fps is (10 sec: 44236.6, 60 sec: 43963.7, 300 sec: 44459.0). Total num frames: 83566592. Throughput: 0: 10928.3. Samples: 3368404. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:44:49,375][25826] Avg episode reward: [(0, '42.528')] -[2024-07-05 11:44:50,109][43181] Updated weights for policy 0, policy_version 12648 (0.0008) -[2024-07-05 11:44:51,926][43181] Updated weights for policy 0, policy_version 12658 (0.0009) -[2024-07-05 11:44:53,744][43181] Updated weights for policy 0, policy_version 12668 (0.0008) -[2024-07-05 11:44:54,374][25826] Fps is (10 sec: 45056.0, 60 sec: 43963.7, 300 sec: 44514.5). Total num frames: 83795968. Throughput: 0: 10868.7. Samples: 3435952. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:44:54,375][25826] Avg episode reward: [(0, '42.789')] -[2024-07-05 11:44:55,609][43181] Updated weights for policy 0, policy_version 12678 (0.0010) -[2024-07-05 11:44:57,396][43181] Updated weights for policy 0, policy_version 12688 (0.0008) -[2024-07-05 11:44:59,234][43181] Updated weights for policy 0, policy_version 12698 (0.0009) -[2024-07-05 11:44:59,374][25826] Fps is (10 sec: 45056.2, 60 sec: 43963.7, 300 sec: 44570.0). Total num frames: 84017152. Throughput: 0: 10909.2. Samples: 3502820. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:44:59,375][25826] Avg episode reward: [(0, '47.964')] -[2024-07-05 11:45:01,077][43181] Updated weights for policy 0, policy_version 12708 (0.0013) -[2024-07-05 11:45:02,888][43181] Updated weights for policy 0, policy_version 12718 (0.0009) -[2024-07-05 11:45:04,374][25826] Fps is (10 sec: 44237.3, 60 sec: 43963.8, 300 sec: 44597.8). Total num frames: 84238336. Throughput: 0: 10998.7. Samples: 3536596. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:45:04,375][25826] Avg episode reward: [(0, '48.425')] -[2024-07-05 11:45:04,780][43181] Updated weights for policy 0, policy_version 12728 (0.0012) -[2024-07-05 11:45:06,624][43181] Updated weights for policy 0, policy_version 12738 (0.0015) -[2024-07-05 11:45:08,455][43181] Updated weights for policy 0, policy_version 12748 (0.0008) -[2024-07-05 11:45:09,376][25826] Fps is (10 sec: 45046.9, 60 sec: 43825.7, 300 sec: 44653.0). Total num frames: 84467712. Throughput: 0: 11051.8. Samples: 3603108. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:45:09,377][25826] Avg episode reward: [(0, '47.630')] -[2024-07-05 11:45:10,286][43181] Updated weights for policy 0, policy_version 12758 (0.0008) -[2024-07-05 11:45:12,137][43181] Updated weights for policy 0, policy_version 12768 (0.0012) -[2024-07-05 11:45:13,996][43181] Updated weights for policy 0, policy_version 12778 (0.0013) -[2024-07-05 11:45:14,374][25826] Fps is (10 sec: 44237.0, 60 sec: 43827.3, 300 sec: 44625.6). Total num frames: 84680704. Throughput: 0: 11071.3. Samples: 3669368. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:45:14,375][25826] Avg episode reward: [(0, '46.722')] -[2024-07-05 11:45:15,788][43181] Updated weights for policy 0, policy_version 12788 (0.0010) -[2024-07-05 11:45:17,610][43181] Updated weights for policy 0, policy_version 12798 (0.0009) -[2024-07-05 11:45:19,374][25826] Fps is (10 sec: 43425.6, 60 sec: 44100.2, 300 sec: 44597.8). Total num frames: 84901888. Throughput: 0: 11138.8. Samples: 3702848. Policy #0 lag: (min: 0.0, avg: 0.9, max: 4.0) -[2024-07-05 11:45:19,375][25826] Avg episode reward: [(0, '45.577')] -[2024-07-05 11:45:19,471][43181] Updated weights for policy 0, policy_version 12808 (0.0009) -[2024-07-05 11:45:21,299][43181] Updated weights for policy 0, policy_version 12818 (0.0011) -[2024-07-05 11:45:23,188][43181] Updated weights for policy 0, policy_version 12828 (0.0008) -[2024-07-05 11:45:24,374][25826] Fps is (10 sec: 45055.3, 60 sec: 44237.0, 300 sec: 44597.8). Total num frames: 85131264. Throughput: 0: 11150.4. Samples: 3769844. Policy #0 lag: (min: 0.0, avg: 0.9, max: 4.0) -[2024-07-05 11:45:24,375][25826] Avg episode reward: [(0, '46.337')] -[2024-07-05 11:45:25,010][43181] Updated weights for policy 0, policy_version 12838 (0.0008) -[2024-07-05 11:45:26,836][43181] Updated weights for policy 0, policy_version 12848 (0.0011) -[2024-07-05 11:45:28,663][43181] Updated weights for policy 0, policy_version 12858 (0.0008) -[2024-07-05 11:45:29,374][25826] Fps is (10 sec: 45056.7, 60 sec: 44373.3, 300 sec: 44597.8). Total num frames: 85352448. Throughput: 0: 11161.0. Samples: 3836948. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:45:29,375][25826] Avg episode reward: [(0, '42.860')] -[2024-07-05 11:45:29,403][43161] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000012862_85360640.pth... -[2024-07-05 11:45:29,494][43161] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000011555_74653696.pth -[2024-07-05 11:45:30,488][43181] Updated weights for policy 0, policy_version 12868 (0.0008) -[2024-07-05 11:45:32,313][43181] Updated weights for policy 0, policy_version 12878 (0.0008) -[2024-07-05 11:45:34,170][43181] Updated weights for policy 0, policy_version 12888 (0.0008) -[2024-07-05 11:45:34,374][25826] Fps is (10 sec: 45055.5, 60 sec: 44646.3, 300 sec: 44597.8). Total num frames: 85581824. Throughput: 0: 11161.6. Samples: 3870676. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:45:34,375][25826] Avg episode reward: [(0, '45.548')] -[2024-07-05 11:45:35,935][43181] Updated weights for policy 0, policy_version 12898 (0.0010) -[2024-07-05 11:45:37,830][43181] Updated weights for policy 0, policy_version 12908 (0.0009) -[2024-07-05 11:45:39,374][25826] Fps is (10 sec: 45054.4, 60 sec: 44646.1, 300 sec: 44597.8). Total num frames: 85803008. Throughput: 0: 11148.6. Samples: 3937644. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:45:39,376][25826] Avg episode reward: [(0, '42.392')] -[2024-07-05 11:45:39,651][43181] Updated weights for policy 0, policy_version 12918 (0.0009) -[2024-07-05 11:45:41,527][43181] Updated weights for policy 0, policy_version 12928 (0.0008) -[2024-07-05 11:45:43,367][43181] Updated weights for policy 0, policy_version 12938 (0.0016) -[2024-07-05 11:45:44,374][25826] Fps is (10 sec: 44237.4, 60 sec: 44646.4, 300 sec: 44570.0). Total num frames: 86024192. Throughput: 0: 11154.9. Samples: 4004788. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:45:44,375][25826] Avg episode reward: [(0, '43.485')] -[2024-07-05 11:45:45,175][43181] Updated weights for policy 0, policy_version 12948 (0.0009) -[2024-07-05 11:45:46,978][43181] Updated weights for policy 0, policy_version 12958 (0.0008) -[2024-07-05 11:45:48,795][43181] Updated weights for policy 0, policy_version 12968 (0.0008) -[2024-07-05 11:45:49,374][25826] Fps is (10 sec: 45057.4, 60 sec: 44782.9, 300 sec: 44570.0). Total num frames: 86253568. Throughput: 0: 11146.5. Samples: 4038192. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:45:49,375][25826] Avg episode reward: [(0, '44.761')] -[2024-07-05 11:45:50,618][43181] Updated weights for policy 0, policy_version 12978 (0.0008) -[2024-07-05 11:45:52,489][43181] Updated weights for policy 0, policy_version 12988 (0.0010) -[2024-07-05 11:45:54,301][43181] Updated weights for policy 0, policy_version 12998 (0.0008) -[2024-07-05 11:45:54,374][25826] Fps is (10 sec: 45054.8, 60 sec: 44646.3, 300 sec: 44570.0). Total num frames: 86474752. Throughput: 0: 11158.3. Samples: 4105212. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:45:54,375][25826] Avg episode reward: [(0, '45.899')] -[2024-07-05 11:45:56,144][43181] Updated weights for policy 0, policy_version 13008 (0.0008) -[2024-07-05 11:45:57,982][43181] Updated weights for policy 0, policy_version 13018 (0.0008) -[2024-07-05 11:45:59,374][25826] Fps is (10 sec: 44235.9, 60 sec: 44646.2, 300 sec: 44597.8). Total num frames: 86695936. Throughput: 0: 11178.0. Samples: 4172384. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:45:59,375][25826] Avg episode reward: [(0, '46.554')] -[2024-07-05 11:45:59,805][43181] Updated weights for policy 0, policy_version 13028 (0.0009) -[2024-07-05 11:46:01,621][43181] Updated weights for policy 0, policy_version 13038 (0.0007) -[2024-07-05 11:46:03,424][43181] Updated weights for policy 0, policy_version 13048 (0.0008) -[2024-07-05 11:46:04,374][25826] Fps is (10 sec: 44237.9, 60 sec: 44646.3, 300 sec: 44625.6). Total num frames: 86917120. Throughput: 0: 11181.5. Samples: 4206012. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:46:04,375][25826] Avg episode reward: [(0, '45.011')] -[2024-07-05 11:46:05,267][43181] Updated weights for policy 0, policy_version 13058 (0.0011) -[2024-07-05 11:46:07,157][43181] Updated weights for policy 0, policy_version 13068 (0.0013) -[2024-07-05 11:46:08,972][43181] Updated weights for policy 0, policy_version 13078 (0.0009) -[2024-07-05 11:46:09,374][25826] Fps is (10 sec: 45057.2, 60 sec: 44647.9, 300 sec: 44653.4). Total num frames: 87146496. Throughput: 0: 11182.3. Samples: 4273048. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:46:09,375][25826] Avg episode reward: [(0, '45.348')] -[2024-07-05 11:46:10,814][43181] Updated weights for policy 0, policy_version 13088 (0.0010) -[2024-07-05 11:46:12,634][43181] Updated weights for policy 0, policy_version 13098 (0.0010) -[2024-07-05 11:46:14,374][25826] Fps is (10 sec: 45056.2, 60 sec: 44782.9, 300 sec: 44625.6). Total num frames: 87367680. Throughput: 0: 11187.2. Samples: 4340372. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:46:14,375][25826] Avg episode reward: [(0, '44.673')] -[2024-07-05 11:46:14,402][43181] Updated weights for policy 0, policy_version 13108 (0.0011) -[2024-07-05 11:46:16,204][43181] Updated weights for policy 0, policy_version 13118 (0.0009) -[2024-07-05 11:46:18,071][43181] Updated weights for policy 0, policy_version 13128 (0.0010) -[2024-07-05 11:46:19,374][25826] Fps is (10 sec: 45055.9, 60 sec: 44919.6, 300 sec: 44625.6). Total num frames: 87597056. Throughput: 0: 11189.1. Samples: 4374184. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:46:19,375][25826] Avg episode reward: [(0, '44.764')] -[2024-07-05 11:46:19,919][43181] Updated weights for policy 0, policy_version 13138 (0.0010) -[2024-07-05 11:46:21,699][43181] Updated weights for policy 0, policy_version 13148 (0.0009) -[2024-07-05 11:46:23,561][43181] Updated weights for policy 0, policy_version 13158 (0.0009) -[2024-07-05 11:46:24,374][25826] Fps is (10 sec: 45055.8, 60 sec: 44783.0, 300 sec: 44625.6). Total num frames: 87818240. Throughput: 0: 11199.3. Samples: 4441608. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:46:24,375][25826] Avg episode reward: [(0, '44.403')] -[2024-07-05 11:46:25,412][43181] Updated weights for policy 0, policy_version 13168 (0.0009) -[2024-07-05 11:46:27,206][43181] Updated weights for policy 0, policy_version 13178 (0.0011) -[2024-07-05 11:46:29,060][43181] Updated weights for policy 0, policy_version 13188 (0.0008) -[2024-07-05 11:46:29,374][25826] Fps is (10 sec: 44236.4, 60 sec: 44782.9, 300 sec: 44625.6). Total num frames: 88039424. Throughput: 0: 11193.6. Samples: 4508500. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:46:29,375][25826] Avg episode reward: [(0, '46.300')] -[2024-07-05 11:46:30,868][43181] Updated weights for policy 0, policy_version 13198 (0.0008) -[2024-07-05 11:46:32,676][43181] Updated weights for policy 0, policy_version 13208 (0.0008) -[2024-07-05 11:46:34,374][25826] Fps is (10 sec: 45055.9, 60 sec: 44783.0, 300 sec: 44653.4). Total num frames: 88268800. Throughput: 0: 11206.2. Samples: 4542472. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:46:34,375][25826] Avg episode reward: [(0, '42.772')] -[2024-07-05 11:46:34,540][43181] Updated weights for policy 0, policy_version 13218 (0.0011) -[2024-07-05 11:46:36,385][43181] Updated weights for policy 0, policy_version 13228 (0.0011) -[2024-07-05 11:46:38,171][43181] Updated weights for policy 0, policy_version 13238 (0.0011) -[2024-07-05 11:46:39,374][25826] Fps is (10 sec: 45056.0, 60 sec: 44783.1, 300 sec: 44625.5). Total num frames: 88489984. Throughput: 0: 11206.0. Samples: 4609480. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:46:39,375][25826] Avg episode reward: [(0, '43.726')] -[2024-07-05 11:46:40,010][43181] Updated weights for policy 0, policy_version 13248 (0.0008) -[2024-07-05 11:46:41,814][43181] Updated weights for policy 0, policy_version 13258 (0.0008) -[2024-07-05 11:46:43,651][43181] Updated weights for policy 0, policy_version 13268 (0.0011) -[2024-07-05 11:46:44,374][25826] Fps is (10 sec: 44236.9, 60 sec: 44782.9, 300 sec: 44625.6). Total num frames: 88711168. Throughput: 0: 11206.7. Samples: 4676680. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:46:44,375][25826] Avg episode reward: [(0, '44.039')] -[2024-07-05 11:46:45,474][43181] Updated weights for policy 0, policy_version 13278 (0.0008) -[2024-07-05 11:46:47,299][43181] Updated weights for policy 0, policy_version 13288 (0.0008) -[2024-07-05 11:46:48,961][43181] Updated weights for policy 0, policy_version 13298 (0.0008) -[2024-07-05 11:46:49,374][25826] Fps is (10 sec: 45875.8, 60 sec: 44919.5, 300 sec: 44708.9). Total num frames: 88948736. Throughput: 0: 11217.4. Samples: 4710796. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:46:49,375][25826] Avg episode reward: [(0, '44.631')] -[2024-07-05 11:46:50,701][43181] Updated weights for policy 0, policy_version 13308 (0.0008) -[2024-07-05 11:46:52,342][43181] Updated weights for policy 0, policy_version 13318 (0.0008) -[2024-07-05 11:46:53,988][43181] Updated weights for policy 0, policy_version 13328 (0.0008) -[2024-07-05 11:46:54,374][25826] Fps is (10 sec: 48332.5, 60 sec: 45329.2, 300 sec: 44792.2). Total num frames: 89194496. Throughput: 0: 11345.8. Samples: 4783608. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:46:54,375][25826] Avg episode reward: [(0, '48.287')] -[2024-07-05 11:46:55,671][43181] Updated weights for policy 0, policy_version 13338 (0.0007) -[2024-07-05 11:46:57,402][43181] Updated weights for policy 0, policy_version 13348 (0.0008) -[2024-07-05 11:46:59,132][43181] Updated weights for policy 0, policy_version 13358 (0.0008) -[2024-07-05 11:46:59,374][25826] Fps is (10 sec: 48332.8, 60 sec: 45602.3, 300 sec: 44820.0). Total num frames: 89432064. Throughput: 0: 11462.1. Samples: 4856168. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:46:59,375][25826] Avg episode reward: [(0, '46.450')] -[2024-07-05 11:47:00,801][43181] Updated weights for policy 0, policy_version 13368 (0.0008) -[2024-07-05 11:47:02,500][43181] Updated weights for policy 0, policy_version 13378 (0.0011) -[2024-07-05 11:47:04,206][43181] Updated weights for policy 0, policy_version 13388 (0.0008) -[2024-07-05 11:47:04,374][25826] Fps is (10 sec: 47513.9, 60 sec: 45875.2, 300 sec: 44875.5). Total num frames: 89669632. Throughput: 0: 11510.4. Samples: 4892152. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:47:04,375][25826] Avg episode reward: [(0, '47.032')] -[2024-07-05 11:47:05,918][43181] Updated weights for policy 0, policy_version 13398 (0.0008) -[2024-07-05 11:47:07,616][43181] Updated weights for policy 0, policy_version 13408 (0.0010) -[2024-07-05 11:47:09,336][43181] Updated weights for policy 0, policy_version 13418 (0.0009) -[2024-07-05 11:47:09,374][25826] Fps is (10 sec: 48332.6, 60 sec: 46148.2, 300 sec: 44931.0). Total num frames: 89915392. Throughput: 0: 11619.0. Samples: 4964464. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:47:09,383][25826] Avg episode reward: [(0, '45.376')] -[2024-07-05 11:47:11,027][43181] Updated weights for policy 0, policy_version 13428 (0.0008) -[2024-07-05 11:47:12,700][43181] Updated weights for policy 0, policy_version 13438 (0.0009) -[2024-07-05 11:47:14,374][25826] Fps is (10 sec: 48332.1, 60 sec: 46421.2, 300 sec: 44986.6). Total num frames: 90152960. Throughput: 0: 11738.8. Samples: 5036748. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:47:14,375][25826] Avg episode reward: [(0, '47.442')] -[2024-07-05 11:47:14,387][43181] Updated weights for policy 0, policy_version 13448 (0.0008) -[2024-07-05 11:47:16,099][43181] Updated weights for policy 0, policy_version 13458 (0.0009) -[2024-07-05 11:47:17,796][43181] Updated weights for policy 0, policy_version 13468 (0.0008) -[2024-07-05 11:47:19,374][25826] Fps is (10 sec: 47513.8, 60 sec: 46557.9, 300 sec: 45014.4). Total num frames: 90390528. Throughput: 0: 11790.9. Samples: 5073064. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:47:19,375][25826] Avg episode reward: [(0, '44.089')] -[2024-07-05 11:47:19,537][43181] Updated weights for policy 0, policy_version 13478 (0.0008) -[2024-07-05 11:47:21,348][43181] Updated weights for policy 0, policy_version 13488 (0.0010) -[2024-07-05 11:47:23,070][43181] Updated weights for policy 0, policy_version 13498 (0.0010) -[2024-07-05 11:47:24,374][25826] Fps is (10 sec: 47514.2, 60 sec: 46830.9, 300 sec: 45070.0). Total num frames: 90628096. Throughput: 0: 11872.3. Samples: 5143732. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:47:24,375][25826] Avg episode reward: [(0, '47.093')] -[2024-07-05 11:47:24,713][43181] Updated weights for policy 0, policy_version 13508 (0.0008) -[2024-07-05 11:47:26,420][43181] Updated weights for policy 0, policy_version 13518 (0.0011) -[2024-07-05 11:47:28,105][43181] Updated weights for policy 0, policy_version 13528 (0.0008) -[2024-07-05 11:47:29,374][25826] Fps is (10 sec: 48332.5, 60 sec: 47240.6, 300 sec: 45125.4). Total num frames: 90873856. Throughput: 0: 12006.2. Samples: 5216960. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:47:29,375][25826] Avg episode reward: [(0, '47.928')] -[2024-07-05 11:47:29,389][43161] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000013535_90873856.pth... -[2024-07-05 11:47:29,470][43161] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000012212_80035840.pth -[2024-07-05 11:47:29,788][43181] Updated weights for policy 0, policy_version 13538 (0.0009) -[2024-07-05 11:47:31,503][43181] Updated weights for policy 0, policy_version 13548 (0.0007) -[2024-07-05 11:47:33,215][43181] Updated weights for policy 0, policy_version 13558 (0.0008) -[2024-07-05 11:47:34,383][25826] Fps is (10 sec: 48332.3, 60 sec: 47377.0, 300 sec: 45180.9). Total num frames: 91111424. Throughput: 0: 12047.4. Samples: 5252928. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:47:34,385][25826] Avg episode reward: [(0, '47.344')] -[2024-07-05 11:47:34,961][43181] Updated weights for policy 0, policy_version 13568 (0.0008) -[2024-07-05 11:47:36,600][43181] Updated weights for policy 0, policy_version 13578 (0.0008) -[2024-07-05 11:47:38,269][43181] Updated weights for policy 0, policy_version 13588 (0.0007) -[2024-07-05 11:47:39,374][25826] Fps is (10 sec: 48332.8, 60 sec: 47786.7, 300 sec: 45236.5). Total num frames: 91357184. Throughput: 0: 12032.3. Samples: 5325060. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:47:39,375][25826] Avg episode reward: [(0, '42.650')] -[2024-07-05 11:47:39,898][43181] Updated weights for policy 0, policy_version 13598 (0.0008) -[2024-07-05 11:47:41,568][43181] Updated weights for policy 0, policy_version 13608 (0.0010) -[2024-07-05 11:47:43,286][43181] Updated weights for policy 0, policy_version 13618 (0.0008) -[2024-07-05 11:47:44,374][25826] Fps is (10 sec: 49152.4, 60 sec: 48196.2, 300 sec: 45319.8). Total num frames: 91602944. Throughput: 0: 12056.5. Samples: 5398712. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:47:44,376][25826] Avg episode reward: [(0, '47.568')] -[2024-07-05 11:47:44,948][43181] Updated weights for policy 0, policy_version 13628 (0.0008) -[2024-07-05 11:47:46,744][43181] Updated weights for policy 0, policy_version 13638 (0.0008) -[2024-07-05 11:47:48,485][43181] Updated weights for policy 0, policy_version 13648 (0.0007) -[2024-07-05 11:47:49,374][25826] Fps is (10 sec: 48332.6, 60 sec: 48196.2, 300 sec: 45375.3). Total num frames: 91840512. Throughput: 0: 12043.2. Samples: 5434096. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:47:49,375][25826] Avg episode reward: [(0, '45.992')] -[2024-07-05 11:47:50,173][43181] Updated weights for policy 0, policy_version 13658 (0.0009) -[2024-07-05 11:47:51,880][43181] Updated weights for policy 0, policy_version 13668 (0.0008) -[2024-07-05 11:47:53,681][43181] Updated weights for policy 0, policy_version 13678 (0.0008) -[2024-07-05 11:47:54,374][25826] Fps is (10 sec: 47513.7, 60 sec: 48059.8, 300 sec: 45403.1). Total num frames: 92078080. Throughput: 0: 12005.4. Samples: 5504704. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:47:54,375][25826] Avg episode reward: [(0, '44.694')] -[2024-07-05 11:47:55,386][43181] Updated weights for policy 0, policy_version 13688 (0.0010) -[2024-07-05 11:47:57,087][43181] Updated weights for policy 0, policy_version 13698 (0.0010) -[2024-07-05 11:47:58,808][43181] Updated weights for policy 0, policy_version 13708 (0.0008) -[2024-07-05 11:47:59,374][25826] Fps is (10 sec: 47513.9, 60 sec: 48059.7, 300 sec: 45430.9). Total num frames: 92315648. Throughput: 0: 11998.9. Samples: 5576700. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:47:59,375][25826] Avg episode reward: [(0, '42.736')] -[2024-07-05 11:48:00,479][43181] Updated weights for policy 0, policy_version 13718 (0.0007) -[2024-07-05 11:48:02,182][43181] Updated weights for policy 0, policy_version 13728 (0.0008) -[2024-07-05 11:48:03,900][43181] Updated weights for policy 0, policy_version 13738 (0.0008) -[2024-07-05 11:48:04,374][25826] Fps is (10 sec: 48333.1, 60 sec: 48196.3, 300 sec: 45514.2). Total num frames: 92561408. Throughput: 0: 11997.8. Samples: 5612964. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:48:04,375][25826] Avg episode reward: [(0, '45.618')] -[2024-07-05 11:48:05,600][43181] Updated weights for policy 0, policy_version 13748 (0.0010) -[2024-07-05 11:48:07,310][43181] Updated weights for policy 0, policy_version 13758 (0.0008) -[2024-07-05 11:48:08,954][43181] Updated weights for policy 0, policy_version 13768 (0.0010) -[2024-07-05 11:48:09,374][25826] Fps is (10 sec: 48332.8, 60 sec: 48059.7, 300 sec: 45569.7). Total num frames: 92798976. Throughput: 0: 12028.6. Samples: 5685020. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:48:09,375][25826] Avg episode reward: [(0, '45.312')] -[2024-07-05 11:48:10,676][43181] Updated weights for policy 0, policy_version 13778 (0.0007) -[2024-07-05 11:48:12,383][43181] Updated weights for policy 0, policy_version 13788 (0.0008) -[2024-07-05 11:48:14,055][43181] Updated weights for policy 0, policy_version 13798 (0.0007) -[2024-07-05 11:48:14,374][25826] Fps is (10 sec: 47513.6, 60 sec: 48059.9, 300 sec: 45597.5). Total num frames: 93036544. Throughput: 0: 12018.4. Samples: 5757788. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:48:14,375][25826] Avg episode reward: [(0, '45.691')] -[2024-07-05 11:48:15,698][43181] Updated weights for policy 0, policy_version 13808 (0.0010) -[2024-07-05 11:48:17,330][43181] Updated weights for policy 0, policy_version 13818 (0.0008) -[2024-07-05 11:48:18,994][43181] Updated weights for policy 0, policy_version 13828 (0.0009) -[2024-07-05 11:48:19,374][25826] Fps is (10 sec: 49152.2, 60 sec: 48332.8, 300 sec: 45708.6). Total num frames: 93290496. Throughput: 0: 12048.8. Samples: 5795124. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:48:19,375][25826] Avg episode reward: [(0, '44.856')] -[2024-07-05 11:48:20,672][43181] Updated weights for policy 0, policy_version 13838 (0.0008) -[2024-07-05 11:48:22,312][43181] Updated weights for policy 0, policy_version 13848 (0.0007) -[2024-07-05 11:48:23,941][43181] Updated weights for policy 0, policy_version 13858 (0.0008) -[2024-07-05 11:48:24,374][25826] Fps is (10 sec: 49970.9, 60 sec: 48469.3, 300 sec: 45764.1). Total num frames: 93536256. Throughput: 0: 12104.3. Samples: 5869752. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:48:24,375][25826] Avg episode reward: [(0, '45.133')] -[2024-07-05 11:48:25,591][43181] Updated weights for policy 0, policy_version 13868 (0.0008) -[2024-07-05 11:48:27,216][43181] Updated weights for policy 0, policy_version 13878 (0.0012) -[2024-07-05 11:48:28,906][43181] Updated weights for policy 0, policy_version 13888 (0.0008) -[2024-07-05 11:48:29,374][25826] Fps is (10 sec: 49971.2, 60 sec: 48605.9, 300 sec: 45875.2). Total num frames: 93790208. Throughput: 0: 12127.3. Samples: 5944440. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:48:29,375][25826] Avg episode reward: [(0, '47.068')] -[2024-07-05 11:48:30,498][43181] Updated weights for policy 0, policy_version 13898 (0.0007) -[2024-07-05 11:48:32,203][43181] Updated weights for policy 0, policy_version 13908 (0.0009) -[2024-07-05 11:48:33,853][43181] Updated weights for policy 0, policy_version 13918 (0.0007) -[2024-07-05 11:48:34,374][25826] Fps is (10 sec: 49971.4, 60 sec: 48742.5, 300 sec: 45930.8). Total num frames: 94035968. Throughput: 0: 12166.6. Samples: 5981592. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:48:34,375][25826] Avg episode reward: [(0, '44.467')] -[2024-07-05 11:48:35,489][43181] Updated weights for policy 0, policy_version 13928 (0.0008) -[2024-07-05 11:48:37,153][43181] Updated weights for policy 0, policy_version 13938 (0.0008) -[2024-07-05 11:48:38,826][43181] Updated weights for policy 0, policy_version 13948 (0.0008) -[2024-07-05 11:48:39,374][25826] Fps is (10 sec: 49152.1, 60 sec: 48742.5, 300 sec: 46014.0). Total num frames: 94281728. Throughput: 0: 12248.6. Samples: 6055892. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:48:39,375][25826] Avg episode reward: [(0, '48.987')] -[2024-07-05 11:48:39,378][43161] Saving new best policy, reward=48.987! -[2024-07-05 11:48:40,464][43181] Updated weights for policy 0, policy_version 13958 (0.0007) -[2024-07-05 11:48:42,117][43181] Updated weights for policy 0, policy_version 13968 (0.0008) -[2024-07-05 11:48:43,782][43181] Updated weights for policy 0, policy_version 13978 (0.0009) -[2024-07-05 11:48:44,374][25826] Fps is (10 sec: 49151.8, 60 sec: 48742.4, 300 sec: 46097.4). Total num frames: 94527488. Throughput: 0: 12293.7. Samples: 6129916. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:48:44,375][25826] Avg episode reward: [(0, '45.596')] -[2024-07-05 11:48:45,499][43181] Updated weights for policy 0, policy_version 13988 (0.0007) -[2024-07-05 11:48:47,177][43181] Updated weights for policy 0, policy_version 13998 (0.0008) -[2024-07-05 11:48:48,874][43181] Updated weights for policy 0, policy_version 14008 (0.0008) -[2024-07-05 11:48:49,374][25826] Fps is (10 sec: 48332.9, 60 sec: 48742.5, 300 sec: 46125.1). Total num frames: 94765056. Throughput: 0: 12287.8. Samples: 6165916. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:48:49,375][25826] Avg episode reward: [(0, '47.567')] -[2024-07-05 11:48:50,598][43181] Updated weights for policy 0, policy_version 14018 (0.0008) -[2024-07-05 11:48:52,376][43181] Updated weights for policy 0, policy_version 14028 (0.0008) -[2024-07-05 11:48:54,041][43181] Updated weights for policy 0, policy_version 14038 (0.0010) -[2024-07-05 11:48:54,374][25826] Fps is (10 sec: 47513.9, 60 sec: 48742.5, 300 sec: 46180.7). Total num frames: 95002624. Throughput: 0: 12278.4. Samples: 6237544. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:48:54,375][25826] Avg episode reward: [(0, '46.962')] -[2024-07-05 11:48:55,706][43181] Updated weights for policy 0, policy_version 14048 (0.0010) -[2024-07-05 11:48:57,371][43181] Updated weights for policy 0, policy_version 14058 (0.0008) -[2024-07-05 11:48:59,033][43181] Updated weights for policy 0, policy_version 14068 (0.0008) -[2024-07-05 11:48:59,374][25826] Fps is (10 sec: 48332.5, 60 sec: 48879.0, 300 sec: 46264.0). Total num frames: 95248384. Throughput: 0: 12297.6. Samples: 6311180. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:48:59,375][25826] Avg episode reward: [(0, '45.485')] -[2024-07-05 11:49:00,740][43181] Updated weights for policy 0, policy_version 14078 (0.0007) -[2024-07-05 11:49:02,387][43181] Updated weights for policy 0, policy_version 14088 (0.0007) -[2024-07-05 11:49:04,119][43181] Updated weights for policy 0, policy_version 14098 (0.0007) -[2024-07-05 11:49:04,374][25826] Fps is (10 sec: 49151.6, 60 sec: 48878.9, 300 sec: 46291.7). Total num frames: 95494144. Throughput: 0: 12276.3. Samples: 6347556. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:49:04,375][25826] Avg episode reward: [(0, '45.954')] -[2024-07-05 11:49:05,826][43181] Updated weights for policy 0, policy_version 14108 (0.0008) -[2024-07-05 11:49:07,537][43181] Updated weights for policy 0, policy_version 14118 (0.0008) -[2024-07-05 11:49:09,183][43181] Updated weights for policy 0, policy_version 14128 (0.0007) -[2024-07-05 11:49:09,374][25826] Fps is (10 sec: 48333.3, 60 sec: 48879.0, 300 sec: 46375.1). Total num frames: 95731712. Throughput: 0: 12221.3. Samples: 6419712. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:49:09,374][25826] Avg episode reward: [(0, '44.329')] -[2024-07-05 11:49:11,060][43181] Updated weights for policy 0, policy_version 14138 (0.0011) -[2024-07-05 11:49:12,831][43181] Updated weights for policy 0, policy_version 14148 (0.0011) -[2024-07-05 11:49:14,375][25826] Fps is (10 sec: 47508.6, 60 sec: 48878.0, 300 sec: 46486.0). Total num frames: 95969280. Throughput: 0: 12116.6. Samples: 6489700. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:49:14,376][25826] Avg episode reward: [(0, '43.705')] -[2024-07-05 11:49:14,523][43181] Updated weights for policy 0, policy_version 14158 (0.0008) -[2024-07-05 11:49:16,204][43181] Updated weights for policy 0, policy_version 14168 (0.0008) -[2024-07-05 11:49:17,875][43181] Updated weights for policy 0, policy_version 14178 (0.0008) -[2024-07-05 11:49:19,374][25826] Fps is (10 sec: 47512.8, 60 sec: 48605.8, 300 sec: 46541.7). Total num frames: 96206848. Throughput: 0: 12096.4. Samples: 6525932. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:49:19,375][25826] Avg episode reward: [(0, '43.420')] -[2024-07-05 11:49:19,659][43181] Updated weights for policy 0, policy_version 14188 (0.0012) -[2024-07-05 11:49:21,440][43181] Updated weights for policy 0, policy_version 14198 (0.0007) -[2024-07-05 11:49:23,284][43181] Updated weights for policy 0, policy_version 14208 (0.0007) -[2024-07-05 11:49:24,374][25826] Fps is (10 sec: 45880.1, 60 sec: 48196.3, 300 sec: 46569.4). Total num frames: 96428032. Throughput: 0: 11977.4. Samples: 6594876. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:49:24,376][25826] Avg episode reward: [(0, '44.376')] -[2024-07-05 11:49:25,250][43181] Updated weights for policy 0, policy_version 14218 (0.0008) -[2024-07-05 11:49:27,107][43181] Updated weights for policy 0, policy_version 14228 (0.0010) -[2024-07-05 11:49:28,942][43181] Updated weights for policy 0, policy_version 14238 (0.0008) -[2024-07-05 11:49:29,374][25826] Fps is (10 sec: 44237.3, 60 sec: 47650.2, 300 sec: 46597.2). Total num frames: 96649216. Throughput: 0: 11782.7. Samples: 6660136. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:49:29,375][25826] Avg episode reward: [(0, '45.186')] -[2024-07-05 11:49:29,380][43161] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000014240_96649216.pth... -[2024-07-05 11:49:29,469][43161] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000012862_85360640.pth -[2024-07-05 11:49:30,976][43181] Updated weights for policy 0, policy_version 14248 (0.0015) -[2024-07-05 11:49:32,823][43181] Updated weights for policy 0, policy_version 14258 (0.0015) -[2024-07-05 11:49:34,375][25826] Fps is (10 sec: 43417.1, 60 sec: 47103.9, 300 sec: 46569.4). Total num frames: 96862208. Throughput: 0: 11685.9. Samples: 6691784. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:49:34,386][25826] Avg episode reward: [(0, '43.217')] -[2024-07-05 11:49:34,670][43181] Updated weights for policy 0, policy_version 14268 (0.0009) -[2024-07-05 11:49:36,384][43181] Updated weights for policy 0, policy_version 14278 (0.0008) -[2024-07-05 11:49:38,103][43181] Updated weights for policy 0, policy_version 14288 (0.0008) -[2024-07-05 11:49:39,374][25826] Fps is (10 sec: 45055.5, 60 sec: 46967.4, 300 sec: 46625.0). Total num frames: 97099776. Throughput: 0: 11628.8. Samples: 6760840. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:49:39,375][25826] Avg episode reward: [(0, '47.114')] -[2024-07-05 11:49:39,854][43181] Updated weights for policy 0, policy_version 14298 (0.0009) -[2024-07-05 11:49:41,666][43181] Updated weights for policy 0, policy_version 14308 (0.0008) -[2024-07-05 11:49:43,494][43181] Updated weights for policy 0, policy_version 14318 (0.0011) -[2024-07-05 11:49:44,375][25826] Fps is (10 sec: 45875.3, 60 sec: 46557.8, 300 sec: 46625.0). Total num frames: 97320960. Throughput: 0: 11490.7. Samples: 6828264. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:49:44,383][25826] Avg episode reward: [(0, '48.554')] -[2024-07-05 11:49:45,473][43181] Updated weights for policy 0, policy_version 14328 (0.0008) -[2024-07-05 11:49:47,367][43181] Updated weights for policy 0, policy_version 14338 (0.0008) -[2024-07-05 11:49:49,248][43181] Updated weights for policy 0, policy_version 14348 (0.0011) -[2024-07-05 11:49:49,374][25826] Fps is (10 sec: 43418.4, 60 sec: 46148.3, 300 sec: 46569.5). Total num frames: 97533952. Throughput: 0: 11393.9. Samples: 6860280. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:49:49,380][25826] Avg episode reward: [(0, '47.141')] -[2024-07-05 11:49:51,156][43181] Updated weights for policy 0, policy_version 14358 (0.0009) -[2024-07-05 11:49:53,081][43181] Updated weights for policy 0, policy_version 14368 (0.0008) -[2024-07-05 11:49:54,375][25826] Fps is (10 sec: 42598.3, 60 sec: 45738.5, 300 sec: 46541.7). Total num frames: 97746944. Throughput: 0: 11221.3. Samples: 6924672. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:49:54,377][25826] Avg episode reward: [(0, '46.710')] -[2024-07-05 11:49:55,060][43181] Updated weights for policy 0, policy_version 14378 (0.0009) -[2024-07-05 11:49:56,999][43181] Updated weights for policy 0, policy_version 14388 (0.0009) -[2024-07-05 11:49:58,919][43181] Updated weights for policy 0, policy_version 14398 (0.0008) -[2024-07-05 11:49:59,374][25826] Fps is (10 sec: 42598.2, 60 sec: 45192.6, 300 sec: 46513.9). Total num frames: 97959936. Throughput: 0: 11072.1. Samples: 6987932. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:49:59,376][25826] Avg episode reward: [(0, '46.326')] -[2024-07-05 11:50:00,794][43181] Updated weights for policy 0, policy_version 14408 (0.0008) -[2024-07-05 11:50:02,659][43181] Updated weights for policy 0, policy_version 14418 (0.0010) -[2024-07-05 11:50:04,377][25826] Fps is (10 sec: 43417.2, 60 sec: 44782.8, 300 sec: 46486.4). Total num frames: 98181120. Throughput: 0: 11010.3. Samples: 7021396. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:50:04,388][25826] Avg episode reward: [(0, '43.823')] -[2024-07-05 11:50:04,500][43181] Updated weights for policy 0, policy_version 14428 (0.0012) -[2024-07-05 11:50:06,316][43181] Updated weights for policy 0, policy_version 14438 (0.0008) -[2024-07-05 11:50:08,141][43181] Updated weights for policy 0, policy_version 14448 (0.0008) -[2024-07-05 11:50:09,375][25826] Fps is (10 sec: 44236.8, 60 sec: 44509.8, 300 sec: 46513.9). Total num frames: 98402304. Throughput: 0: 10949.3. Samples: 7087596. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:50:09,385][25826] Avg episode reward: [(0, '46.142')] -[2024-07-05 11:50:10,206][43181] Updated weights for policy 0, policy_version 14458 (0.0014) -[2024-07-05 11:50:12,116][43181] Updated weights for policy 0, policy_version 14468 (0.0009) -[2024-07-05 11:50:13,984][43181] Updated weights for policy 0, policy_version 14478 (0.0010) -[2024-07-05 11:50:14,375][25826] Fps is (10 sec: 42599.3, 60 sec: 43964.5, 300 sec: 46458.4). Total num frames: 98607104. Throughput: 0: 10913.4. Samples: 7151240. Policy #0 lag: (min: 0.0, avg: 1.5, max: 4.0) -[2024-07-05 11:50:14,378][25826] Avg episode reward: [(0, '46.083')] -[2024-07-05 11:50:15,893][43181] Updated weights for policy 0, policy_version 14488 (0.0009) -[2024-07-05 11:50:17,735][43181] Updated weights for policy 0, policy_version 14498 (0.0008) -[2024-07-05 11:50:19,375][25826] Fps is (10 sec: 42598.4, 60 sec: 43690.8, 300 sec: 46430.6). Total num frames: 98828288. Throughput: 0: 10945.4. Samples: 7184324. Policy #0 lag: (min: 0.0, avg: 1.5, max: 4.0) -[2024-07-05 11:50:19,384][25826] Avg episode reward: [(0, '47.002')] -[2024-07-05 11:50:19,630][43181] Updated weights for policy 0, policy_version 14508 (0.0011) -[2024-07-05 11:50:21,471][43181] Updated weights for policy 0, policy_version 14518 (0.0009) -[2024-07-05 11:50:23,306][43181] Updated weights for policy 0, policy_version 14528 (0.0008) -[2024-07-05 11:50:24,375][25826] Fps is (10 sec: 43417.2, 60 sec: 43554.1, 300 sec: 46402.8). Total num frames: 99041280. Throughput: 0: 10862.7. Samples: 7249660. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:50:24,388][25826] Avg episode reward: [(0, '43.784')] -[2024-07-05 11:50:25,391][43181] Updated weights for policy 0, policy_version 14538 (0.0028) -[2024-07-05 11:50:27,508][43181] Updated weights for policy 0, policy_version 14548 (0.0021) -[2024-07-05 11:50:29,375][25826] Fps is (10 sec: 41779.5, 60 sec: 43281.1, 300 sec: 46319.5). Total num frames: 99246080. Throughput: 0: 10732.8. Samples: 7311240. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:50:29,386][25826] Avg episode reward: [(0, '46.946')] -[2024-07-05 11:50:29,391][43181] Updated weights for policy 0, policy_version 14558 (0.0012) -[2024-07-05 11:50:31,193][43181] Updated weights for policy 0, policy_version 14568 (0.0010) -[2024-07-05 11:50:33,412][43181] Updated weights for policy 0, policy_version 14578 (0.0027) -[2024-07-05 11:50:34,375][25826] Fps is (10 sec: 41779.4, 60 sec: 43281.1, 300 sec: 46291.8). Total num frames: 99459072. Throughput: 0: 10727.8. Samples: 7343032. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:50:34,385][25826] Avg episode reward: [(0, '43.723')] -[2024-07-05 11:50:35,192][43181] Updated weights for policy 0, policy_version 14588 (0.0015) -[2024-07-05 11:50:37,125][43181] Updated weights for policy 0, policy_version 14598 (0.0011) -[2024-07-05 11:50:38,999][43181] Updated weights for policy 0, policy_version 14608 (0.0012) -[2024-07-05 11:50:39,374][25826] Fps is (10 sec: 42598.4, 60 sec: 42871.6, 300 sec: 46264.0). Total num frames: 99672064. Throughput: 0: 10703.6. Samples: 7406332. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:50:39,376][25826] Avg episode reward: [(0, '47.587')] -[2024-07-05 11:50:40,902][43181] Updated weights for policy 0, policy_version 14618 (0.0011) -[2024-07-05 11:50:42,887][43181] Updated weights for policy 0, policy_version 14628 (0.0019) -[2024-07-05 11:50:44,375][25826] Fps is (10 sec: 42598.6, 60 sec: 42735.0, 300 sec: 46208.5). Total num frames: 99885056. Throughput: 0: 10703.6. Samples: 7469596. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:50:44,385][25826] Avg episode reward: [(0, '45.807')] -[2024-07-05 11:50:45,078][43181] Updated weights for policy 0, policy_version 14638 (0.0022) -[2024-07-05 11:50:46,983][43181] Updated weights for policy 0, policy_version 14648 (0.0011) -[2024-07-05 11:50:47,590][43161] Stopping Batcher_0... -[2024-07-05 11:50:47,592][43161] Loop batcher_evt_loop terminating... -[2024-07-05 11:50:47,595][43161] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000014651_100016128.pth... -[2024-07-05 11:50:47,590][25826] Component Batcher_0 stopped! -[2024-07-05 11:50:47,638][43181] Weights refcount: 2 0 -[2024-07-05 11:50:47,642][43181] Stopping InferenceWorker_p0-w0... -[2024-07-05 11:50:47,642][43181] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 11:50:47,642][25826] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 11:50:47,664][43212] Stopping RolloutWorker_w14... -[2024-07-05 11:50:47,665][43212] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 11:50:47,665][43207] Stopping RolloutWorker_w10... -[2024-07-05 11:50:47,665][43189] Stopping RolloutWorker_w8... -[2024-07-05 11:50:47,664][25826] Component RolloutWorker_w14 stopped! -[2024-07-05 11:50:47,666][43207] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 11:50:47,666][43189] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 11:50:47,666][43187] Stopping RolloutWorker_w5... -[2024-07-05 11:50:47,666][43211] Stopping RolloutWorker_w15... -[2024-07-05 11:50:47,666][43187] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 11:50:47,666][43211] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 11:50:47,667][43210] Stopping RolloutWorker_w13... -[2024-07-05 11:50:47,667][43210] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 11:50:47,668][43184] Stopping RolloutWorker_w2... -[2024-07-05 11:50:47,668][43184] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 11:50:47,666][25826] Component RolloutWorker_w8 stopped! -[2024-07-05 11:50:47,669][43206] Stopping RolloutWorker_w9... -[2024-07-05 11:50:47,670][43206] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 11:50:47,670][43185] Stopping RolloutWorker_w3... -[2024-07-05 11:50:47,670][43209] Stopping RolloutWorker_w12... -[2024-07-05 11:50:47,670][43186] Stopping RolloutWorker_w4... -[2024-07-05 11:50:47,670][43209] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 11:50:47,670][43185] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 11:50:47,669][25826] Component RolloutWorker_w10 stopped! -[2024-07-05 11:50:47,671][43186] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 11:50:47,671][25826] Component RolloutWorker_w5 stopped! -[2024-07-05 11:50:47,671][43188] Stopping RolloutWorker_w7... -[2024-07-05 11:50:47,672][25826] Component RolloutWorker_w15 stopped! -[2024-07-05 11:50:47,673][43183] Stopping RolloutWorker_w1... -[2024-07-05 11:50:47,674][43183] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 11:50:47,673][25826] Component RolloutWorker_w13 stopped! -[2024-07-05 11:50:47,674][43190] Stopping RolloutWorker_w6... -[2024-07-05 11:50:47,674][25826] Component RolloutWorker_w2 stopped! -[2024-07-05 11:50:47,674][43190] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 11:50:47,674][43188] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 11:50:47,675][25826] Component RolloutWorker_w9 stopped! -[2024-07-05 11:50:47,675][25826] Component RolloutWorker_w3 stopped! -[2024-07-05 11:50:47,676][25826] Component RolloutWorker_w12 stopped! -[2024-07-05 11:50:47,677][25826] Component RolloutWorker_w4 stopped! -[2024-07-05 11:50:47,678][25826] Component RolloutWorker_w7 stopped! -[2024-07-05 11:50:47,678][25826] Component RolloutWorker_w1 stopped! -[2024-07-05 11:50:47,679][25826] Component RolloutWorker_w6 stopped! -[2024-07-05 11:50:47,680][25826] Component RolloutWorker_w0 stopped! -[2024-07-05 11:50:47,679][43182] Stopping RolloutWorker_w0... -[2024-07-05 11:50:47,685][43161] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000013535_90873856.pth -[2024-07-05 11:50:47,684][43182] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 11:50:47,686][43208] Stopping RolloutWorker_w11... -[2024-07-05 11:50:47,687][43161] Saving new best policy, reward=49.263! -[2024-07-05 11:50:47,687][43208] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 11:50:47,686][25826] Component RolloutWorker_w11 stopped! -[2024-07-05 11:50:47,800][43161] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000014651_100016128.pth... -[2024-07-05 11:50:47,919][25826] Component LearnerWorker_p0 stopped! -[2024-07-05 11:50:47,919][43161] Stopping LearnerWorker_p0... -[2024-07-05 11:50:47,923][43161] Loop learner_proc0_evt_loop terminating... -[2024-07-05 11:50:47,922][25826] Waiting for process learner_proc0 to stop... -[2024-07-05 11:50:49,436][25826] Waiting for process inference_proc0-0 to join... -[2024-07-05 11:50:49,437][25826] Waiting for process rollout_proc0 to join... -[2024-07-05 11:50:49,438][25826] Waiting for process rollout_proc1 to join... -[2024-07-05 11:50:49,438][25826] Waiting for process rollout_proc2 to join... -[2024-07-05 11:50:49,438][25826] Waiting for process rollout_proc3 to join... -[2024-07-05 11:50:49,439][25826] Waiting for process rollout_proc4 to join... -[2024-07-05 11:50:49,439][25826] Waiting for process rollout_proc5 to join... -[2024-07-05 11:50:49,439][25826] Waiting for process rollout_proc6 to join... -[2024-07-05 11:50:49,440][25826] Waiting for process rollout_proc7 to join... -[2024-07-05 11:50:49,440][25826] Waiting for process rollout_proc8 to join... -[2024-07-05 11:50:49,440][25826] Waiting for process rollout_proc9 to join... -[2024-07-05 11:50:49,440][25826] Waiting for process rollout_proc10 to join... -[2024-07-05 11:50:49,441][25826] Waiting for process rollout_proc11 to join... -[2024-07-05 11:50:49,441][25826] Waiting for process rollout_proc12 to join... -[2024-07-05 11:50:49,441][25826] Waiting for process rollout_proc13 to join... -[2024-07-05 11:50:49,441][25826] Waiting for process rollout_proc14 to join... -[2024-07-05 11:50:49,442][25826] Waiting for process rollout_proc15 to join... -[2024-07-05 11:50:49,442][25826] Batcher 0 profile tree view: -batching: 47.8943, releasing_batches: 0.0903 -[2024-07-05 11:50:49,442][25826] InferenceWorker_p0-w0 profile tree view: -wait_policy: 0.0000 - wait_policy_total: 20.3000 -update_model: 10.6028 - weight_update: 0.0011 -one_step: 0.0026 - handle_policy_step: 615.0484 - deserialize: 50.6437, stack: 3.5444, obs_to_device_normalize: 146.8590, forward: 285.8822, send_messages: 30.1302 - prepare_outputs: 75.2306 - to_cpu: 44.7905 -[2024-07-05 11:50:49,442][25826] Learner 0 profile tree view: -misc: 0.0154, prepare_batch: 60.8018 -train: 136.9666 - epoch_init: 0.0124, minibatch_init: 0.0195, losses_postprocess: 0.8133, kl_divergence: 0.9458, after_optimizer: 1.0309 - calculate_losses: 49.2744 - losses_init: 0.0074, forward_head: 2.2622, bptt_initial: 37.5046, tail: 2.0034, advantages_returns: 0.5749, losses: 3.2175 - bptt: 3.1252 - bptt_forward_core: 2.9792 - update: 83.3315 - clip: 2.9031 -[2024-07-05 11:50:49,443][25826] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.2780, enqueue_policy_requests: 18.9559, env_step: 315.1499, overhead: 34.4416, complete_rollouts: 0.8598 -save_policy_outputs: 22.9399 - split_output_tensors: 10.5951 -[2024-07-05 11:50:49,443][25826] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.3024, enqueue_policy_requests: 19.7532, env_step: 320.4766, overhead: 34.4947, complete_rollouts: 0.9708 -save_policy_outputs: 23.2271 - split_output_tensors: 10.8595 -[2024-07-05 11:50:49,447][25826] Loop Runner_EvtLoop terminating... -[2024-07-05 11:50:49,447][25826] Runner profile tree view: -main_loop: 676.8932 -[2024-07-05 11:50:49,448][25826] Collected {0: 100016128}, FPS: 44330.9 -[2024-07-05 11:51:10,068][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 11:51:10,068][25826] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 11:51:10,069][25826] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 11:51:10,069][25826] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 11:51:10,070][25826] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 11:51:10,070][25826] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 11:51:10,070][25826] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 11:51:10,070][25826] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 11:51:10,070][25826] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 11:51:10,071][25826] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 11:51:10,071][25826] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 11:51:10,071][25826] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 11:51:10,071][25826] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 11:51:10,072][25826] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 11:51:10,072][25826] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 11:51:10,088][25826] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:51:10,089][25826] RunningMeanStd input shape: (1,) -[2024-07-05 11:51:10,103][25826] ConvEncoder: input_channels=3 -[2024-07-05 11:51:10,138][25826] Conv encoder output size: 512 -[2024-07-05 11:51:10,139][25826] Policy head output size: 512 -[2024-07-05 11:51:10,164][25826] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000014651_100016128.pth... -[2024-07-05 11:51:10,514][25826] Num frames 100... -[2024-07-05 11:51:10,576][25826] Num frames 200... -[2024-07-05 11:51:10,638][25826] Num frames 300... -[2024-07-05 11:51:10,701][25826] Num frames 400... -[2024-07-05 11:51:10,763][25826] Num frames 500... -[2024-07-05 11:51:10,822][25826] Num frames 600... -[2024-07-05 11:51:10,884][25826] Num frames 700... -[2024-07-05 11:51:10,947][25826] Num frames 800... -[2024-07-05 11:51:11,007][25826] Num frames 900... -[2024-07-05 11:51:11,066][25826] Num frames 1000... -[2024-07-05 11:51:11,127][25826] Num frames 1100... -[2024-07-05 11:51:11,187][25826] Num frames 1200... -[2024-07-05 11:51:11,250][25826] Num frames 1300... -[2024-07-05 11:51:11,314][25826] Num frames 1400... -[2024-07-05 11:51:11,389][25826] Num frames 1500... -[2024-07-05 11:51:11,455][25826] Num frames 1600... -[2024-07-05 11:51:11,527][25826] Num frames 1700... -[2024-07-05 11:51:11,594][25826] Num frames 1800... -[2024-07-05 11:51:11,655][25826] Num frames 1900... -[2024-07-05 11:51:11,715][25826] Num frames 2000... -[2024-07-05 11:51:11,777][25826] Num frames 2100... -[2024-07-05 11:51:11,829][25826] Avg episode rewards: #0: 58.999, true rewards: #0: 21.000 -[2024-07-05 11:51:11,830][25826] Avg episode reward: 58.999, avg true_objective: 21.000 -[2024-07-05 11:51:11,895][25826] Num frames 2200... -[2024-07-05 11:51:11,954][25826] Num frames 2300... -[2024-07-05 11:51:12,012][25826] Num frames 2400... -[2024-07-05 11:51:12,073][25826] Num frames 2500... -[2024-07-05 11:51:12,132][25826] Num frames 2600... -[2024-07-05 11:51:12,195][25826] Num frames 2700... -[2024-07-05 11:51:12,267][25826] Num frames 2800... -[2024-07-05 11:51:12,328][25826] Num frames 2900... -[2024-07-05 11:51:12,389][25826] Num frames 3000... -[2024-07-05 11:51:12,450][25826] Num frames 3100... -[2024-07-05 11:51:12,513][25826] Num frames 3200... -[2024-07-05 11:51:12,571][25826] Num frames 3300... -[2024-07-05 11:51:12,632][25826] Num frames 3400... -[2024-07-05 11:51:12,693][25826] Num frames 3500... -[2024-07-05 11:51:12,753][25826] Num frames 3600... -[2024-07-05 11:51:12,815][25826] Num frames 3700... -[2024-07-05 11:51:12,885][25826] Num frames 3800... -[2024-07-05 11:51:12,946][25826] Num frames 3900... -[2024-07-05 11:51:13,008][25826] Num frames 4000... -[2024-07-05 11:51:13,079][25826] Num frames 4100... -[2024-07-05 11:51:13,145][25826] Num frames 4200... -[2024-07-05 11:51:13,198][25826] Avg episode rewards: #0: 56.499, true rewards: #0: 21.000 -[2024-07-05 11:51:13,198][25826] Avg episode reward: 56.499, avg true_objective: 21.000 -[2024-07-05 11:51:13,264][25826] Num frames 4300... -[2024-07-05 11:51:13,324][25826] Num frames 4400... -[2024-07-05 11:51:13,382][25826] Num frames 4500... -[2024-07-05 11:51:13,452][25826] Num frames 4600... -[2024-07-05 11:51:13,526][25826] Num frames 4700... -[2024-07-05 11:51:13,588][25826] Num frames 4800... -[2024-07-05 11:51:13,649][25826] Num frames 4900... -[2024-07-05 11:51:13,709][25826] Num frames 5000... -[2024-07-05 11:51:13,772][25826] Num frames 5100... -[2024-07-05 11:51:13,833][25826] Num frames 5200... -[2024-07-05 11:51:13,894][25826] Num frames 5300... -[2024-07-05 11:51:13,956][25826] Num frames 5400... -[2024-07-05 11:51:14,017][25826] Num frames 5500... -[2024-07-05 11:51:14,078][25826] Num frames 5600... -[2024-07-05 11:51:14,140][25826] Num frames 5700... -[2024-07-05 11:51:14,202][25826] Num frames 5800... -[2024-07-05 11:51:14,263][25826] Num frames 5900... -[2024-07-05 11:51:14,323][25826] Num frames 6000... -[2024-07-05 11:51:14,386][25826] Num frames 6100... -[2024-07-05 11:51:14,446][25826] Num frames 6200... -[2024-07-05 11:51:14,511][25826] Num frames 6300... -[2024-07-05 11:51:14,563][25826] Avg episode rewards: #0: 56.665, true rewards: #0: 21.000 -[2024-07-05 11:51:14,564][25826] Avg episode reward: 56.665, avg true_objective: 21.000 -[2024-07-05 11:51:14,629][25826] Num frames 6400... -[2024-07-05 11:51:14,690][25826] Num frames 6500... -[2024-07-05 11:51:14,750][25826] Num frames 6600... -[2024-07-05 11:51:14,810][25826] Num frames 6700... -[2024-07-05 11:51:14,868][25826] Num frames 6800... -[2024-07-05 11:51:14,930][25826] Num frames 6900... -[2024-07-05 11:51:14,988][25826] Num frames 7000... -[2024-07-05 11:51:15,049][25826] Num frames 7100... -[2024-07-05 11:51:15,110][25826] Num frames 7200... -[2024-07-05 11:51:15,173][25826] Num frames 7300... -[2024-07-05 11:51:15,234][25826] Num frames 7400... -[2024-07-05 11:51:15,295][25826] Num frames 7500... -[2024-07-05 11:51:15,356][25826] Num frames 7600... -[2024-07-05 11:51:15,421][25826] Num frames 7700... -[2024-07-05 11:51:15,485][25826] Num frames 7800... -[2024-07-05 11:51:15,550][25826] Num frames 7900... -[2024-07-05 11:51:15,612][25826] Num frames 8000... -[2024-07-05 11:51:15,673][25826] Num frames 8100... -[2024-07-05 11:51:15,734][25826] Num frames 8200... -[2024-07-05 11:51:15,800][25826] Num frames 8300... -[2024-07-05 11:51:15,864][25826] Num frames 8400... -[2024-07-05 11:51:15,916][25826] Avg episode rewards: #0: 56.749, true rewards: #0: 21.000 -[2024-07-05 11:51:15,917][25826] Avg episode reward: 56.749, avg true_objective: 21.000 -[2024-07-05 11:51:15,985][25826] Num frames 8500... -[2024-07-05 11:51:16,046][25826] Num frames 8600... -[2024-07-05 11:51:16,108][25826] Num frames 8700... -[2024-07-05 11:51:16,167][25826] Num frames 8800... -[2024-07-05 11:51:16,244][25826] Num frames 8900... -[2024-07-05 11:51:16,309][25826] Num frames 9000... -[2024-07-05 11:51:16,369][25826] Num frames 9100... -[2024-07-05 11:51:16,430][25826] Num frames 9200... -[2024-07-05 11:51:16,490][25826] Num frames 9300... -[2024-07-05 11:51:16,551][25826] Num frames 9400... -[2024-07-05 11:51:16,615][25826] Num frames 9500... -[2024-07-05 11:51:16,679][25826] Num frames 9600... -[2024-07-05 11:51:16,738][25826] Num frames 9700... -[2024-07-05 11:51:16,801][25826] Num frames 9800... -[2024-07-05 11:51:16,862][25826] Num frames 9900... -[2024-07-05 11:51:16,923][25826] Num frames 10000... -[2024-07-05 11:51:16,984][25826] Num frames 10100... -[2024-07-05 11:51:17,052][25826] Num frames 10200... -[2024-07-05 11:51:17,110][25826] Num frames 10300... -[2024-07-05 11:51:17,168][25826] Num frames 10400... -[2024-07-05 11:51:17,230][25826] Num frames 10500... -[2024-07-05 11:51:17,282][25826] Avg episode rewards: #0: 56.599, true rewards: #0: 21.000 -[2024-07-05 11:51:17,283][25826] Avg episode reward: 56.599, avg true_objective: 21.000 -[2024-07-05 11:51:17,346][25826] Num frames 10600... -[2024-07-05 11:51:17,405][25826] Num frames 10700... -[2024-07-05 11:51:17,466][25826] Num frames 10800... -[2024-07-05 11:51:17,529][25826] Num frames 10900... -[2024-07-05 11:51:17,587][25826] Num frames 11000... -[2024-07-05 11:51:17,645][25826] Num frames 11100... -[2024-07-05 11:51:17,705][25826] Num frames 11200... -[2024-07-05 11:51:17,763][25826] Num frames 11300... -[2024-07-05 11:51:17,820][25826] Num frames 11400... -[2024-07-05 11:51:17,878][25826] Num frames 11500... -[2024-07-05 11:51:17,937][25826] Num frames 11600... -[2024-07-05 11:51:17,999][25826] Num frames 11700... -[2024-07-05 11:51:18,062][25826] Num frames 11800... -[2024-07-05 11:51:18,130][25826] Num frames 11900... -[2024-07-05 11:51:18,189][25826] Avg episode rewards: #0: 52.012, true rewards: #0: 19.847 -[2024-07-05 11:51:18,190][25826] Avg episode reward: 52.012, avg true_objective: 19.847 -[2024-07-05 11:51:18,251][25826] Num frames 12000... -[2024-07-05 11:51:18,313][25826] Num frames 12100... -[2024-07-05 11:51:18,373][25826] Num frames 12200... -[2024-07-05 11:51:18,438][25826] Num frames 12300... -[2024-07-05 11:51:18,533][25826] Avg episode rewards: #0: 45.808, true rewards: #0: 17.666 -[2024-07-05 11:51:18,535][25826] Avg episode reward: 45.808, avg true_objective: 17.666 -[2024-07-05 11:51:18,563][25826] Num frames 12400... -[2024-07-05 11:51:18,623][25826] Num frames 12500... -[2024-07-05 11:51:18,683][25826] Num frames 12600... -[2024-07-05 11:51:18,743][25826] Num frames 12700... -[2024-07-05 11:51:18,801][25826] Num frames 12800... -[2024-07-05 11:51:18,860][25826] Num frames 12900... -[2024-07-05 11:51:18,920][25826] Num frames 13000... -[2024-07-05 11:51:18,978][25826] Num frames 13100... -[2024-07-05 11:51:19,039][25826] Num frames 13200... -[2024-07-05 11:51:19,100][25826] Num frames 13300... -[2024-07-05 11:51:19,163][25826] Num frames 13400... -[2024-07-05 11:51:19,224][25826] Num frames 13500... -[2024-07-05 11:51:19,283][25826] Num frames 13600... -[2024-07-05 11:51:19,343][25826] Num frames 13700... -[2024-07-05 11:51:19,415][25826] Num frames 13800... -[2024-07-05 11:51:19,475][25826] Num frames 13900... -[2024-07-05 11:51:19,537][25826] Num frames 14000... -[2024-07-05 11:51:19,599][25826] Num frames 14100... -[2024-07-05 11:51:19,659][25826] Num frames 14200... -[2024-07-05 11:51:19,721][25826] Num frames 14300... -[2024-07-05 11:51:19,782][25826] Num frames 14400... -[2024-07-05 11:51:19,877][25826] Avg episode rewards: #0: 46.832, true rewards: #0: 18.083 -[2024-07-05 11:51:19,878][25826] Avg episode reward: 46.832, avg true_objective: 18.083 -[2024-07-05 11:51:19,900][25826] Num frames 14500... -[2024-07-05 11:51:19,959][25826] Num frames 14600... -[2024-07-05 11:51:20,021][25826] Num frames 14700... -[2024-07-05 11:51:20,080][25826] Num frames 14800... -[2024-07-05 11:51:20,141][25826] Num frames 14900... -[2024-07-05 11:51:20,203][25826] Num frames 15000... -[2024-07-05 11:51:20,265][25826] Num frames 15100... -[2024-07-05 11:51:20,326][25826] Num frames 15200... -[2024-07-05 11:51:20,388][25826] Num frames 15300... -[2024-07-05 11:51:20,450][25826] Num frames 15400... -[2024-07-05 11:51:20,507][25826] Num frames 15500... -[2024-07-05 11:51:20,567][25826] Num frames 15600... -[2024-07-05 11:51:20,627][25826] Num frames 15700... -[2024-07-05 11:51:20,688][25826] Num frames 15800... -[2024-07-05 11:51:20,751][25826] Num frames 15900... -[2024-07-05 11:51:20,816][25826] Num frames 16000... -[2024-07-05 11:51:20,878][25826] Num frames 16100... -[2024-07-05 11:51:20,944][25826] Num frames 16200... -[2024-07-05 11:51:21,007][25826] Num frames 16300... -[2024-07-05 11:51:21,069][25826] Num frames 16400... -[2024-07-05 11:51:21,131][25826] Num frames 16500... -[2024-07-05 11:51:21,230][25826] Avg episode rewards: #0: 48.961, true rewards: #0: 18.407 -[2024-07-05 11:51:21,231][25826] Avg episode reward: 48.961, avg true_objective: 18.407 -[2024-07-05 11:51:21,261][25826] Num frames 16600... -[2024-07-05 11:51:21,322][25826] Num frames 16700... -[2024-07-05 11:51:21,383][25826] Num frames 16800... -[2024-07-05 11:51:21,448][25826] Num frames 16900... -[2024-07-05 11:51:21,512][25826] Num frames 17000... -[2024-07-05 11:51:21,574][25826] Num frames 17100... -[2024-07-05 11:51:21,636][25826] Num frames 17200... -[2024-07-05 11:51:21,697][25826] Num frames 17300... -[2024-07-05 11:51:21,759][25826] Num frames 17400... -[2024-07-05 11:51:21,822][25826] Num frames 17500... -[2024-07-05 11:51:21,885][25826] Num frames 17600... -[2024-07-05 11:51:21,947][25826] Num frames 17700... -[2024-07-05 11:51:22,007][25826] Num frames 17800... -[2024-07-05 11:51:22,067][25826] Num frames 17900... -[2024-07-05 11:51:22,129][25826] Num frames 18000... -[2024-07-05 11:51:22,190][25826] Num frames 18100... -[2024-07-05 11:51:22,251][25826] Num frames 18200... -[2024-07-05 11:51:22,313][25826] Num frames 18300... -[2024-07-05 11:51:22,374][25826] Num frames 18400... -[2024-07-05 11:51:22,447][25826] Num frames 18500... -[2024-07-05 11:51:22,511][25826] Num frames 18600... -[2024-07-05 11:51:22,607][25826] Avg episode rewards: #0: 49.665, true rewards: #0: 18.666 -[2024-07-05 11:51:22,609][25826] Avg episode reward: 49.665, avg true_objective: 18.666 -[2024-07-05 11:51:42,222][25826] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 11:53:01,381][25826] Environment doom_basic already registered, overwriting... -[2024-07-05 11:53:01,382][25826] Environment doom_two_colors_easy already registered, overwriting... -[2024-07-05 11:53:01,382][25826] Environment doom_two_colors_hard already registered, overwriting... -[2024-07-05 11:53:01,382][25826] Environment doom_dm already registered, overwriting... -[2024-07-05 11:53:01,383][25826] Environment doom_dwango5 already registered, overwriting... -[2024-07-05 11:53:01,383][25826] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-07-05 11:53:01,383][25826] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-07-05 11:53:01,383][25826] Environment doom_my_way_home already registered, overwriting... -[2024-07-05 11:53:01,384][25826] Environment doom_deadly_corridor already registered, overwriting... -[2024-07-05 11:53:01,384][25826] Environment doom_defend_the_center already registered, overwriting... -[2024-07-05 11:53:01,384][25826] Environment doom_defend_the_line already registered, overwriting... -[2024-07-05 11:53:01,384][25826] Environment doom_health_gathering already registered, overwriting... -[2024-07-05 11:53:01,385][25826] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-07-05 11:53:01,385][25826] Environment doom_battle already registered, overwriting... -[2024-07-05 11:53:01,385][25826] Environment doom_battle2 already registered, overwriting... -[2024-07-05 11:53:01,386][25826] Environment doom_duel_bots already registered, overwriting... -[2024-07-05 11:53:01,386][25826] Environment doom_deathmatch_bots already registered, overwriting... -[2024-07-05 11:53:01,386][25826] Environment doom_duel already registered, overwriting... -[2024-07-05 11:53:01,386][25826] Environment doom_deathmatch_full already registered, overwriting... -[2024-07-05 11:53:01,387][25826] Environment doom_benchmark already registered, overwriting... -[2024-07-05 11:53:01,387][25826] register_encoder_factory: -[2024-07-05 11:53:01,392][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 11:53:01,392][25826] Overriding arg 'train_for_env_steps' with value 150000000 passed from command line -[2024-07-05 11:53:01,396][25826] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment already exists! -[2024-07-05 11:53:01,397][25826] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment... -[2024-07-05 11:53:01,397][25826] Weights and Biases integration disabled -[2024-07-05 11:53:01,400][25826] Environment var CUDA_VISIBLE_DEVICES is 0 - -[2024-07-05 11:53:04,364][25826] Starting experiment with the following configuration: -help=False -algo=APPO -env=doom_health_gathering_supreme -experiment=default_experiment -train_dir=/home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir -restart_behavior=resume -device=gpu -seed=200 -num_policies=1 -async_rl=True -serial_mode=False -batched_sampling=False -num_batches_to_accumulate=2 -worker_num_splits=2 -policy_workers_per_policy=1 -max_policy_lag=1000 -num_workers=16 -num_envs_per_worker=8 -batch_size=2048 -num_batches_per_epoch=1 -num_epochs=1 -rollout=32 -recurrence=32 -shuffle_minibatches=False -gamma=0.99 -reward_scale=1.0 -reward_clip=1000.0 -value_bootstrap=False -normalize_returns=True -exploration_loss_coeff=0.001 -value_loss_coeff=0.5 -kl_loss_coeff=0.0 -exploration_loss=symmetric_kl -gae_lambda=0.95 -ppo_clip_ratio=0.1 -ppo_clip_value=0.2 -with_vtrace=False -vtrace_rho=1.0 -vtrace_c=1.0 -optimizer=adam -adam_eps=1e-06 -adam_beta1=0.9 -adam_beta2=0.999 -max_grad_norm=4.0 -learning_rate=0.0001 -lr_schedule=constant -lr_schedule_kl_threshold=0.008 -lr_adaptive_min=1e-06 -lr_adaptive_max=0.01 -obs_subtract_mean=0.0 -obs_scale=255.0 -normalize_input=True -normalize_input_keys=None -decorrelate_experience_max_seconds=0 -decorrelate_envs_on_one_worker=True -actor_worker_gpus=[] -set_workers_cpu_affinity=True -force_envs_single_thread=False -default_niceness=0 -log_to_file=True -experiment_summaries_interval=10 -flush_summaries_interval=30 -stats_avg=100 -summaries_use_frameskip=True -heartbeat_interval=20 -heartbeat_reporting_interval=600 -train_for_env_steps=150000000 -train_for_seconds=10000000000 -save_every_sec=120 -keep_checkpoints=2 -load_checkpoint_kind=latest -save_milestones_sec=-1 -save_best_every_sec=5 -save_best_metric=reward -save_best_after=100000 -benchmark=False -encoder_mlp_layers=[512, 512] -encoder_conv_architecture=convnet_simple -encoder_conv_mlp_layers=[512] -use_rnn=True -rnn_size=512 -rnn_type=gru -rnn_num_layers=1 -decoder_mlp_layers=[] -nonlinearity=elu -policy_initialization=orthogonal -policy_init_gain=1.0 -actor_critic_share_weights=True -adaptive_stddev=True -continuous_tanh_scale=0.0 -initial_stddev=1.0 -use_env_info_cache=False -env_gpu_actions=False -env_gpu_observations=True -env_frameskip=4 -env_framestack=1 -pixel_format=CHW -use_record_episode_statistics=False -with_wandb=False -wandb_user=None -wandb_project=sample_factory -wandb_group=None -wandb_job_type=SF -wandb_tags=[] -with_pbt=False -pbt_mix_policies_in_one_env=True -pbt_period_env_steps=5000000 -pbt_start_mutation=20000000 -pbt_replace_fraction=0.3 -pbt_mutation_rate=0.15 -pbt_replace_reward_gap=0.1 -pbt_replace_reward_gap_absolute=1e-06 -pbt_optimize_gamma=False -pbt_target_objective=true_objective -pbt_perturb_min=1.1 -pbt_perturb_max=1.5 -num_agents=-1 -num_humans=0 -num_bots=-1 -start_bot_difficulty=None -timelimit=None -res_w=128 -res_h=72 -wide_aspect_ratio=False -eval_env_frameskip=1 -fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=20000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 20000000} -git_hash=unknown -git_repo_name=not a git repository -[2024-07-05 11:53:04,365][25826] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 11:53:04,366][25826] Rollout worker 0 uses device cpu -[2024-07-05 11:53:04,366][25826] Rollout worker 1 uses device cpu -[2024-07-05 11:53:04,366][25826] Rollout worker 2 uses device cpu -[2024-07-05 11:53:04,366][25826] Rollout worker 3 uses device cpu -[2024-07-05 11:53:04,367][25826] Rollout worker 4 uses device cpu -[2024-07-05 11:53:04,367][25826] Rollout worker 5 uses device cpu -[2024-07-05 11:53:04,367][25826] Rollout worker 6 uses device cpu -[2024-07-05 11:53:04,367][25826] Rollout worker 7 uses device cpu -[2024-07-05 11:53:04,368][25826] Rollout worker 8 uses device cpu -[2024-07-05 11:53:04,368][25826] Rollout worker 9 uses device cpu -[2024-07-05 11:53:04,368][25826] Rollout worker 10 uses device cpu -[2024-07-05 11:53:04,368][25826] Rollout worker 11 uses device cpu -[2024-07-05 11:53:04,369][25826] Rollout worker 12 uses device cpu -[2024-07-05 11:53:04,369][25826] Rollout worker 13 uses device cpu -[2024-07-05 11:53:04,369][25826] Rollout worker 14 uses device cpu -[2024-07-05 11:53:04,369][25826] Rollout worker 15 uses device cpu -[2024-07-05 11:53:04,461][25826] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:53:04,462][25826] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 11:53:04,514][25826] Starting all processes... -[2024-07-05 11:53:04,515][25826] Starting process learner_proc0 -[2024-07-05 11:53:04,564][25826] Starting all processes... -[2024-07-05 11:53:04,567][25826] Starting process inference_proc0-0 -[2024-07-05 11:53:04,568][25826] Starting process rollout_proc0 -[2024-07-05 11:53:04,569][25826] Starting process rollout_proc1 -[2024-07-05 11:53:04,569][25826] Starting process rollout_proc2 -[2024-07-05 11:53:04,569][25826] Starting process rollout_proc3 -[2024-07-05 11:53:04,570][25826] Starting process rollout_proc4 -[2024-07-05 11:53:04,570][25826] Starting process rollout_proc5 -[2024-07-05 11:53:04,572][25826] Starting process rollout_proc6 -[2024-07-05 11:53:04,574][25826] Starting process rollout_proc7 -[2024-07-05 11:53:04,576][25826] Starting process rollout_proc8 -[2024-07-05 11:53:04,577][25826] Starting process rollout_proc9 -[2024-07-05 11:53:04,577][25826] Starting process rollout_proc10 -[2024-07-05 11:53:04,577][25826] Starting process rollout_proc11 -[2024-07-05 11:53:04,577][25826] Starting process rollout_proc12 -[2024-07-05 11:53:04,578][25826] Starting process rollout_proc13 -[2024-07-05 11:53:04,578][25826] Starting process rollout_proc14 -[2024-07-05 11:53:04,606][25826] Starting process rollout_proc15 -[2024-07-05 11:53:08,259][47614] Worker 4 uses CPU cores [4] -[2024-07-05 11:53:08,600][47636] Worker 13 uses CPU cores [13] -[2024-07-05 11:53:08,676][47610] Worker 0 uses CPU cores [0] -[2024-07-05 11:53:08,688][47637] Worker 10 uses CPU cores [10] -[2024-07-05 11:53:08,708][47639] Worker 12 uses CPU cores [12] -[2024-07-05 11:53:08,723][47609] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:53:08,724][47609] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 11:53:08,756][47617] Worker 8 uses CPU cores [8] -[2024-07-05 11:53:08,800][47615] Worker 5 uses CPU cores [5] -[2024-07-05 11:53:08,853][47609] Num visible devices: 1 -[2024-07-05 11:53:08,856][47611] Worker 3 uses CPU cores [3] -[2024-07-05 11:53:08,920][47612] Worker 1 uses CPU cores [1] -[2024-07-05 11:53:08,924][47638] Worker 15 uses CPU cores [15] -[2024-07-05 11:53:08,954][47618] Worker 6 uses CPU cores [6] -[2024-07-05 11:53:08,955][47616] Worker 7 uses CPU cores [7] -[2024-07-05 11:53:09,028][47634] Worker 9 uses CPU cores [9] -[2024-07-05 11:53:09,045][47589] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:53:09,045][47589] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 11:53:09,090][47589] Num visible devices: 1 -[2024-07-05 11:53:09,113][47589] Setting fixed seed 200 -[2024-07-05 11:53:09,116][47589] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:53:09,116][47589] Initializing actor-critic model on device cuda:0 -[2024-07-05 11:53:09,116][47589] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:53:09,118][47589] RunningMeanStd input shape: (1,) -[2024-07-05 11:53:09,120][47635] Worker 11 uses CPU cores [11] -[2024-07-05 11:53:09,125][47589] ConvEncoder: input_channels=3 -[2024-07-05 11:53:09,134][47613] Worker 2 uses CPU cores [2] -[2024-07-05 11:53:09,191][47589] Conv encoder output size: 512 -[2024-07-05 11:53:09,192][47589] Policy head output size: 512 -[2024-07-05 11:53:09,202][47589] Created Actor Critic model with architecture: -[2024-07-05 11:53:09,202][47589] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) + (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (5): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (6): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (7): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) + (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (9): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (10): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) + (11): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (12): ELU(alpha=1.0) + ) + (mlp_layers): Sequential( + (0): Linear(in_features=4608, out_features=512, bias=True) + (1): ELU(alpha=1.0) ) ) ) @@ -6121,1588 +129,968 @@ git_repo_name=not a git repository (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-07-05 11:53:09,209][47640] Worker 14 uses CPU cores [14] -[2024-07-05 11:53:09,324][47589] Using optimizer -[2024-07-05 11:53:09,966][47589] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000014651_100016128.pth... -[2024-07-05 11:53:09,984][47589] Loading model from checkpoint -[2024-07-05 11:53:09,985][47589] Loaded experiment state at self.train_step=14651, self.env_steps=100016128 -[2024-07-05 11:53:09,985][47589] Initialized policy 0 weights for model version 14651 -[2024-07-05 11:53:09,986][47589] LearnerWorker_p0 finished initialization! -[2024-07-05 11:53:09,986][47589] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 11:53:10,054][47609] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 11:53:10,054][47609] RunningMeanStd input shape: (1,) -[2024-07-05 11:53:10,062][47609] ConvEncoder: input_channels=3 -[2024-07-05 11:53:10,117][47609] Conv encoder output size: 512 -[2024-07-05 11:53:10,117][47609] Policy head output size: 512 -[2024-07-05 11:53:10,151][25826] Inference worker 0-0 is ready! -[2024-07-05 11:53:10,151][25826] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 11:53:10,201][47634] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,202][47611] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,204][47640] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,204][47639] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,204][47612] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,208][47636] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,208][47613] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,208][47615] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,208][47610] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,209][47618] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,215][47616] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,219][47638] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,220][47635] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,225][47614] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,234][47617] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,244][47637] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 11:53:10,547][47634] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,547][47640] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,548][47639] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,561][47616] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,564][47611] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,578][47610] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,587][47612] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,591][47613] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,720][47640] Decorrelating experience for 32 frames... -[2024-07-05 11:53:10,728][47639] Decorrelating experience for 32 frames... -[2024-07-05 11:53:10,745][47611] Decorrelating experience for 32 frames... -[2024-07-05 11:53:10,748][47617] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,768][47637] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,779][47618] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,795][47635] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,829][47616] Decorrelating experience for 32 frames... -[2024-07-05 11:53:10,914][47617] Decorrelating experience for 32 frames... -[2024-07-05 11:53:10,956][47618] Decorrelating experience for 32 frames... -[2024-07-05 11:53:10,957][47613] Decorrelating experience for 32 frames... -[2024-07-05 11:53:10,960][47639] Decorrelating experience for 64 frames... -[2024-07-05 11:53:10,965][47614] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,972][47636] Decorrelating experience for 0 frames... -[2024-07-05 11:53:10,997][47610] Decorrelating experience for 32 frames... -[2024-07-05 11:53:11,005][47640] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,134][47617] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,134][47638] Decorrelating experience for 0 frames... -[2024-07-05 11:53:11,148][47612] Decorrelating experience for 32 frames... -[2024-07-05 11:53:11,151][47614] Decorrelating experience for 32 frames... -[2024-07-05 11:53:11,179][47637] Decorrelating experience for 32 frames... -[2024-07-05 11:53:11,209][47635] Decorrelating experience for 32 frames... -[2024-07-05 11:53:11,263][47615] Decorrelating experience for 0 frames... -[2024-07-05 11:53:11,315][47612] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,316][47617] Decorrelating experience for 96 frames... -[2024-07-05 11:53:11,341][47638] Decorrelating experience for 32 frames... -[2024-07-05 11:53:11,347][47639] Decorrelating experience for 96 frames... -[2024-07-05 11:53:11,351][47614] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,395][47635] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,400][47640] Decorrelating experience for 96 frames... -[2024-07-05 11:53:11,400][25826] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 100016128. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 11:53:11,403][47636] Decorrelating experience for 32 frames... -[2024-07-05 11:53:11,526][47637] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,533][47634] Decorrelating experience for 32 frames... -[2024-07-05 11:53:11,561][47638] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,565][47616] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,588][47615] Decorrelating experience for 32 frames... -[2024-07-05 11:53:11,592][47636] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,594][47614] Decorrelating experience for 96 frames... -[2024-07-05 11:53:11,704][47612] Decorrelating experience for 96 frames... -[2024-07-05 11:53:11,708][47637] Decorrelating experience for 96 frames... -[2024-07-05 11:53:11,743][47638] Decorrelating experience for 96 frames... -[2024-07-05 11:53:11,752][47613] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,760][47634] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,760][47616] Decorrelating experience for 96 frames... -[2024-07-05 11:53:11,802][47639] Decorrelating experience for 128 frames... -[2024-07-05 11:53:11,940][47611] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,944][47634] Decorrelating experience for 96 frames... -[2024-07-05 11:53:11,949][47618] Decorrelating experience for 64 frames... -[2024-07-05 11:53:11,954][47636] Decorrelating experience for 96 frames... -[2024-07-05 11:53:11,961][47640] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,009][47613] Decorrelating experience for 96 frames... -[2024-07-05 11:53:12,128][47615] Decorrelating experience for 64 frames... -[2024-07-05 11:53:12,169][47612] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,184][47611] Decorrelating experience for 96 frames... -[2024-07-05 11:53:12,184][47640] Decorrelating experience for 160 frames... -[2024-07-05 11:53:12,196][47610] Decorrelating experience for 64 frames... -[2024-07-05 11:53:12,210][47634] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,217][47616] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,221][47636] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,332][47635] Decorrelating experience for 96 frames... -[2024-07-05 11:53:12,381][47615] Decorrelating experience for 96 frames... -[2024-07-05 11:53:12,394][47613] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,411][47638] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,460][47612] Decorrelating experience for 160 frames... -[2024-07-05 11:53:12,460][47610] Decorrelating experience for 96 frames... -[2024-07-05 11:53:12,461][47617] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,478][47637] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,493][47634] Decorrelating experience for 160 frames... -[2024-07-05 11:53:12,562][47640] Decorrelating experience for 192 frames... -[2024-07-05 11:53:12,610][47618] Decorrelating experience for 96 frames... -[2024-07-05 11:53:12,650][47635] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,654][47615] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,683][47611] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,731][47612] Decorrelating experience for 192 frames... -[2024-07-05 11:53:12,733][47613] Decorrelating experience for 160 frames... -[2024-07-05 11:53:12,740][47639] Decorrelating experience for 160 frames... -[2024-07-05 11:53:12,799][47640] Decorrelating experience for 224 frames... -[2024-07-05 11:53:12,808][47617] Decorrelating experience for 160 frames... -[2024-07-05 11:53:12,861][47614] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,889][47610] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,899][47618] Decorrelating experience for 128 frames... -[2024-07-05 11:53:12,973][47611] Decorrelating experience for 160 frames... -[2024-07-05 11:53:12,990][47634] Decorrelating experience for 192 frames... -[2024-07-05 11:53:13,004][47616] Decorrelating experience for 160 frames... -[2024-07-05 11:53:13,042][47613] Decorrelating experience for 192 frames... -[2024-07-05 11:53:13,099][47617] Decorrelating experience for 192 frames... -[2024-07-05 11:53:13,114][47636] Decorrelating experience for 160 frames... -[2024-07-05 11:53:13,124][47639] Decorrelating experience for 192 frames... -[2024-07-05 11:53:13,188][47618] Decorrelating experience for 160 frames... -[2024-07-05 11:53:13,201][47637] Decorrelating experience for 160 frames... -[2024-07-05 11:53:13,210][47612] Decorrelating experience for 224 frames... -[2024-07-05 11:53:13,263][47614] Decorrelating experience for 160 frames... -[2024-07-05 11:53:13,299][47616] Decorrelating experience for 192 frames... -[2024-07-05 11:53:13,310][47638] Decorrelating experience for 160 frames... -[2024-07-05 11:53:13,374][47617] Decorrelating experience for 224 frames... -[2024-07-05 11:53:13,429][47636] Decorrelating experience for 192 frames... -[2024-07-05 11:53:13,429][47611] Decorrelating experience for 192 frames... -[2024-07-05 11:53:13,440][47639] Decorrelating experience for 224 frames... -[2024-07-05 11:53:13,541][47615] Decorrelating experience for 160 frames... -[2024-07-05 11:53:13,542][47614] Decorrelating experience for 192 frames... -[2024-07-05 11:53:13,542][47613] Decorrelating experience for 224 frames... -[2024-07-05 11:53:13,561][47616] Decorrelating experience for 224 frames... -[2024-07-05 11:53:13,675][47634] Decorrelating experience for 224 frames... -[2024-07-05 11:53:13,703][47618] Decorrelating experience for 192 frames... -[2024-07-05 11:53:13,766][47610] Decorrelating experience for 160 frames... -[2024-07-05 11:53:13,789][47636] Decorrelating experience for 224 frames... -[2024-07-05 11:53:13,845][47614] Decorrelating experience for 224 frames... -[2024-07-05 11:53:13,948][47611] Decorrelating experience for 224 frames... -[2024-07-05 11:53:14,003][47637] Decorrelating experience for 192 frames... -[2024-07-05 11:53:14,035][47610] Decorrelating experience for 192 frames... -[2024-07-05 11:53:14,035][47615] Decorrelating experience for 192 frames... -[2024-07-05 11:53:14,110][47618] Decorrelating experience for 224 frames... -[2024-07-05 11:53:14,277][47635] Decorrelating experience for 160 frames... -[2024-07-05 11:53:14,315][47610] Decorrelating experience for 224 frames... -[2024-07-05 11:53:14,320][47615] Decorrelating experience for 224 frames... -[2024-07-05 11:53:14,446][47589] Signal inference workers to stop experience collection... -[2024-07-05 11:53:14,479][47609] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 11:53:14,573][47635] Decorrelating experience for 192 frames... -[2024-07-05 11:53:14,573][47638] Decorrelating experience for 192 frames... -[2024-07-05 11:53:14,775][47637] Decorrelating experience for 224 frames... -[2024-07-05 11:53:14,790][47638] Decorrelating experience for 224 frames... -[2024-07-05 11:53:15,016][47635] Decorrelating experience for 224 frames... -[2024-07-05 11:53:16,282][47589] Signal inference workers to resume experience collection... -[2024-07-05 11:53:16,284][47609] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 11:53:16,406][25826] Fps is (10 sec: 1637.0, 60 sec: 1637.0, 300 sec: 1637.0). Total num frames: 100024320. Throughput: 0: 1042.3. Samples: 5216. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 11:53:16,420][25826] Avg episode reward: [(0, '1.115')] -[2024-07-05 11:53:19,257][47609] Updated weights for policy 0, policy_version 14661 (0.0032) -[2024-07-05 11:53:21,403][25826] Fps is (10 sec: 15562.3, 60 sec: 15562.3, 300 sec: 15562.3). Total num frames: 100171776. Throughput: 0: 2232.4. Samples: 22328. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 11:53:21,419][25826] Avg episode reward: [(0, '12.626')] -[2024-07-05 11:53:21,521][47609] Updated weights for policy 0, policy_version 14671 (0.0027) -[2024-07-05 11:53:23,794][47609] Updated weights for policy 0, policy_version 14681 (0.0030) -[2024-07-05 11:53:24,454][25826] Heartbeat connected on Batcher_0 -[2024-07-05 11:53:24,466][25826] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 11:53:24,467][25826] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 11:53:24,470][25826] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 11:53:24,473][25826] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 11:53:24,474][25826] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 11:53:24,480][25826] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 11:53:24,482][25826] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 11:53:24,483][25826] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 11:53:24,488][25826] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 11:53:24,489][25826] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 11:53:24,495][25826] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 11:53:24,499][25826] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 11:53:24,505][25826] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 11:53:24,508][25826] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 11:53:24,508][25826] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 11:53:24,512][25826] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 11:53:24,516][25826] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 11:53:24,521][25826] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 11:53:26,030][47609] Updated weights for policy 0, policy_version 14691 (0.0030) -[2024-07-05 11:53:26,401][25826] Fps is (10 sec: 32782.3, 60 sec: 22391.4, 300 sec: 22391.4). Total num frames: 100352000. Throughput: 0: 5166.1. Samples: 77492. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 11:53:26,416][25826] Avg episode reward: [(0, '51.701')] -[2024-07-05 11:53:26,523][47589] Saving new best policy, reward=51.701! -[2024-07-05 11:53:30,179][47609] Updated weights for policy 0, policy_version 14701 (0.0043) -[2024-07-05 11:53:31,403][25826] Fps is (10 sec: 28675.9, 60 sec: 22118.1, 300 sec: 22118.1). Total num frames: 100458496. Throughput: 0: 5645.7. Samples: 112916. Policy #0 lag: (min: 1.0, avg: 2.2, max: 3.0) -[2024-07-05 11:53:31,415][25826] Avg episode reward: [(0, '48.109')] -[2024-07-05 11:53:34,144][47609] Updated weights for policy 0, policy_version 14711 (0.0031) -[2024-07-05 11:53:36,404][25826] Fps is (10 sec: 16379.3, 60 sec: 19986.2, 300 sec: 19986.2). Total num frames: 100515840. Throughput: 0: 5087.3. Samples: 127196. Policy #0 lag: (min: 1.0, avg: 2.2, max: 3.0) -[2024-07-05 11:53:36,420][25826] Avg episode reward: [(0, '45.365')] -[2024-07-05 11:53:41,402][25826] Fps is (10 sec: 10649.6, 60 sec: 18295.3, 300 sec: 18295.3). Total num frames: 100564992. Throughput: 0: 4518.4. Samples: 135552. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0) -[2024-07-05 11:53:41,422][25826] Avg episode reward: [(0, '51.181')] -[2024-07-05 11:53:42,187][47609] Updated weights for policy 0, policy_version 14721 (0.0033) -[2024-07-05 11:53:46,403][25826] Fps is (10 sec: 13107.4, 60 sec: 18021.0, 300 sec: 18021.0). Total num frames: 100646912. Throughput: 0: 4639.8. Samples: 162404. Policy #0 lag: (min: 0.0, avg: 1.7, max: 4.0) -[2024-07-05 11:53:46,418][25826] Avg episode reward: [(0, '49.474')] -[2024-07-05 11:53:48,996][47609] Updated weights for policy 0, policy_version 14731 (0.0041) -[2024-07-05 11:53:50,665][47609] Updated weights for policy 0, policy_version 14741 (0.0007) -[2024-07-05 11:53:51,401][25826] Fps is (10 sec: 21299.3, 60 sec: 19046.3, 300 sec: 19046.3). Total num frames: 100777984. Throughput: 0: 4229.1. Samples: 169164. Policy #0 lag: (min: 1.0, avg: 2.0, max: 3.0) -[2024-07-05 11:53:51,406][25826] Avg episode reward: [(0, '47.052')] -[2024-07-05 11:53:52,594][47609] Updated weights for policy 0, policy_version 14751 (0.0016) -[2024-07-05 11:53:54,440][47609] Updated weights for policy 0, policy_version 14761 (0.0012) -[2024-07-05 11:53:56,242][47609] Updated weights for policy 0, policy_version 14771 (0.0008) -[2024-07-05 11:53:56,400][25826] Fps is (10 sec: 35235.1, 60 sec: 21845.3, 300 sec: 21845.3). Total num frames: 100999168. Throughput: 0: 5239.6. Samples: 235780. Policy #0 lag: (min: 1.0, avg: 2.0, max: 3.0) -[2024-07-05 11:53:56,405][25826] Avg episode reward: [(0, '44.255')] -[2024-07-05 11:53:57,996][47609] Updated weights for policy 0, policy_version 14781 (0.0012) -[2024-07-05 11:53:59,716][47609] Updated weights for policy 0, policy_version 14791 (0.0008) -[2024-07-05 11:54:01,401][25826] Fps is (10 sec: 45057.2, 60 sec: 24248.3, 300 sec: 24248.3). Total num frames: 101228544. Throughput: 0: 6662.6. Samples: 305004. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:54:01,411][25826] Avg episode reward: [(0, '48.570')] -[2024-07-05 11:54:01,568][47609] Updated weights for policy 0, policy_version 14801 (0.0014) -[2024-07-05 11:54:03,404][47609] Updated weights for policy 0, policy_version 14811 (0.0013) -[2024-07-05 11:54:05,129][47609] Updated weights for policy 0, policy_version 14821 (0.0008) -[2024-07-05 11:54:06,402][25826] Fps is (10 sec: 46693.8, 60 sec: 26363.3, 300 sec: 26363.3). Total num frames: 101466112. Throughput: 0: 7041.6. Samples: 339188. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:54:06,413][25826] Avg episode reward: [(0, '47.868')] -[2024-07-05 11:54:06,968][47609] Updated weights for policy 0, policy_version 14831 (0.0008) -[2024-07-05 11:54:08,898][47609] Updated weights for policy 0, policy_version 14841 (0.0022) -[2024-07-05 11:54:10,731][47609] Updated weights for policy 0, policy_version 14851 (0.0008) -[2024-07-05 11:54:11,401][25826] Fps is (10 sec: 45055.5, 60 sec: 27716.2, 300 sec: 27716.2). Total num frames: 101679104. Throughput: 0: 7298.7. Samples: 405932. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:54:11,405][25826] Avg episode reward: [(0, '50.479')] -[2024-07-05 11:54:12,517][47609] Updated weights for policy 0, policy_version 14861 (0.0007) -[2024-07-05 11:54:14,189][47609] Updated weights for policy 0, policy_version 14871 (0.0010) -[2024-07-05 11:54:15,884][47609] Updated weights for policy 0, policy_version 14881 (0.0008) -[2024-07-05 11:54:16,400][25826] Fps is (10 sec: 45056.8, 60 sec: 31541.5, 300 sec: 29239.1). Total num frames: 101916672. Throughput: 0: 8091.9. Samples: 477048. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:54:16,402][25826] Avg episode reward: [(0, '47.000')] -[2024-07-05 11:54:17,652][47609] Updated weights for policy 0, policy_version 14891 (0.0007) -[2024-07-05 11:54:19,394][47609] Updated weights for policy 0, policy_version 14901 (0.0010) -[2024-07-05 11:54:21,135][47609] Updated weights for policy 0, policy_version 14911 (0.0008) -[2024-07-05 11:54:21,401][25826] Fps is (10 sec: 47513.3, 60 sec: 33041.9, 300 sec: 30544.4). Total num frames: 102154240. Throughput: 0: 8556.8. Samples: 512228. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:54:21,403][25826] Avg episode reward: [(0, '47.700')] -[2024-07-05 11:54:22,904][47609] Updated weights for policy 0, policy_version 14921 (0.0008) -[2024-07-05 11:54:24,661][47609] Updated weights for policy 0, policy_version 14931 (0.0010) -[2024-07-05 11:54:26,360][47609] Updated weights for policy 0, policy_version 14941 (0.0008) -[2024-07-05 11:54:26,400][25826] Fps is (10 sec: 47513.4, 60 sec: 33996.8, 300 sec: 31675.7). Total num frames: 102391808. Throughput: 0: 9937.7. Samples: 582748. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:54:26,402][25826] Avg episode reward: [(0, '48.906')] -[2024-07-05 11:54:28,006][47609] Updated weights for policy 0, policy_version 14951 (0.0008) -[2024-07-05 11:54:29,755][47609] Updated weights for policy 0, policy_version 14961 (0.0008) -[2024-07-05 11:54:31,401][25826] Fps is (10 sec: 46694.6, 60 sec: 36044.9, 300 sec: 32563.2). Total num frames: 102621184. Throughput: 0: 10908.5. Samples: 653256. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:54:31,412][25826] Avg episode reward: [(0, '44.487')] -[2024-07-05 11:54:31,561][47609] Updated weights for policy 0, policy_version 14971 (0.0009) -[2024-07-05 11:54:33,255][47609] Updated weights for policy 0, policy_version 14981 (0.0010) -[2024-07-05 11:54:35,008][47609] Updated weights for policy 0, policy_version 14991 (0.0008) -[2024-07-05 11:54:36,400][25826] Fps is (10 sec: 47513.6, 60 sec: 39186.9, 300 sec: 33539.0). Total num frames: 102866944. Throughput: 0: 11550.0. Samples: 688912. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:54:36,402][25826] Avg episode reward: [(0, '48.376')] -[2024-07-05 11:54:36,752][47609] Updated weights for policy 0, policy_version 15001 (0.0008) -[2024-07-05 11:54:38,594][47609] Updated weights for policy 0, policy_version 15011 (0.0014) -[2024-07-05 11:54:40,310][47609] Updated weights for policy 0, policy_version 15021 (0.0010) -[2024-07-05 11:54:41,401][25826] Fps is (10 sec: 47513.8, 60 sec: 42189.0, 300 sec: 34224.3). Total num frames: 103096320. Throughput: 0: 11625.4. Samples: 758924. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:54:41,414][25826] Avg episode reward: [(0, '46.887')] -[2024-07-05 11:54:42,061][47609] Updated weights for policy 0, policy_version 15031 (0.0008) -[2024-07-05 11:54:43,799][47609] Updated weights for policy 0, policy_version 15041 (0.0010) -[2024-07-05 11:54:45,429][47609] Updated weights for policy 0, policy_version 15051 (0.0008) -[2024-07-05 11:54:46,400][25826] Fps is (10 sec: 46694.3, 60 sec: 44784.9, 300 sec: 34923.8). Total num frames: 103333888. Throughput: 0: 11680.3. Samples: 830620. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:54:46,402][25826] Avg episode reward: [(0, '45.050')] -[2024-07-05 11:54:47,052][47609] Updated weights for policy 0, policy_version 15061 (0.0008) -[2024-07-05 11:54:48,874][47609] Updated weights for policy 0, policy_version 15071 (0.0011) -[2024-07-05 11:54:50,674][47609] Updated weights for policy 0, policy_version 15081 (0.0008) -[2024-07-05 11:54:51,406][25826] Fps is (10 sec: 47513.9, 60 sec: 46558.1, 300 sec: 35553.3). Total num frames: 103571456. Throughput: 0: 11705.7. Samples: 865944. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:54:51,407][25826] Avg episode reward: [(0, '45.321')] -[2024-07-05 11:54:52,364][47609] Updated weights for policy 0, policy_version 15091 (0.0008) -[2024-07-05 11:54:54,099][47609] Updated weights for policy 0, policy_version 15101 (0.0008) -[2024-07-05 11:54:55,777][47609] Updated weights for policy 0, policy_version 15111 (0.0008) -[2024-07-05 11:54:56,401][25826] Fps is (10 sec: 47513.5, 60 sec: 46830.9, 300 sec: 36122.8). Total num frames: 103809024. Throughput: 0: 11803.0. Samples: 937068. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:54:56,402][25826] Avg episode reward: [(0, '44.170')] -[2024-07-05 11:54:57,495][47609] Updated weights for policy 0, policy_version 15121 (0.0008) -[2024-07-05 11:54:59,205][47609] Updated weights for policy 0, policy_version 15131 (0.0008) -[2024-07-05 11:55:00,935][47609] Updated weights for policy 0, policy_version 15141 (0.0008) -[2024-07-05 11:55:01,400][25826] Fps is (10 sec: 47513.8, 60 sec: 46967.5, 300 sec: 36640.6). Total num frames: 104046592. Throughput: 0: 11821.0. Samples: 1008992. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:55:01,401][25826] Avg episode reward: [(0, '48.685')] -[2024-07-05 11:55:01,422][47589] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000015144_104054784.pth... -[2024-07-05 11:55:01,504][47589] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000014240_96649216.pth -[2024-07-05 11:55:02,673][47609] Updated weights for policy 0, policy_version 15151 (0.0009) -[2024-07-05 11:55:04,361][47609] Updated weights for policy 0, policy_version 15161 (0.0007) -[2024-07-05 11:55:06,044][47609] Updated weights for policy 0, policy_version 15171 (0.0007) -[2024-07-05 11:55:06,401][25826] Fps is (10 sec: 48332.6, 60 sec: 47104.0, 300 sec: 37184.5). Total num frames: 104292352. Throughput: 0: 11832.3. Samples: 1044680. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:55:06,402][25826] Avg episode reward: [(0, '45.571')] -[2024-07-05 11:55:07,784][47609] Updated weights for policy 0, policy_version 15181 (0.0008) -[2024-07-05 11:55:09,461][47609] Updated weights for policy 0, policy_version 15191 (0.0008) -[2024-07-05 11:55:11,176][47609] Updated weights for policy 0, policy_version 15201 (0.0008) -[2024-07-05 11:55:11,400][25826] Fps is (10 sec: 47513.5, 60 sec: 47377.2, 300 sec: 37546.7). Total num frames: 104521728. Throughput: 0: 11868.0. Samples: 1116808. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:55:11,401][25826] Avg episode reward: [(0, '48.985')] -[2024-07-05 11:55:12,974][47609] Updated weights for policy 0, policy_version 15211 (0.0008) -[2024-07-05 11:55:14,677][47609] Updated weights for policy 0, policy_version 15221 (0.0008) -[2024-07-05 11:55:16,313][47609] Updated weights for policy 0, policy_version 15231 (0.0008) -[2024-07-05 11:55:16,400][25826] Fps is (10 sec: 47513.8, 60 sec: 47513.5, 300 sec: 38010.9). Total num frames: 104767488. Throughput: 0: 11894.1. Samples: 1188492. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:55:16,401][25826] Avg episode reward: [(0, '46.313')] -[2024-07-05 11:55:18,017][47609] Updated weights for policy 0, policy_version 15241 (0.0008) -[2024-07-05 11:55:19,718][47609] Updated weights for policy 0, policy_version 15251 (0.0008) -[2024-07-05 11:55:21,400][25826] Fps is (10 sec: 48332.7, 60 sec: 47513.7, 300 sec: 38376.4). Total num frames: 105005056. Throughput: 0: 11908.2. Samples: 1224780. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:55:21,401][25826] Avg episode reward: [(0, '44.973')] -[2024-07-05 11:55:21,416][47609] Updated weights for policy 0, policy_version 15261 (0.0007) -[2024-07-05 11:55:23,108][47609] Updated weights for policy 0, policy_version 15271 (0.0010) -[2024-07-05 11:55:24,800][47609] Updated weights for policy 0, policy_version 15281 (0.0009) -[2024-07-05 11:55:26,400][25826] Fps is (10 sec: 48332.8, 60 sec: 47650.1, 300 sec: 38775.5). Total num frames: 105250816. Throughput: 0: 11959.5. Samples: 1297100. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:55:26,409][25826] Avg episode reward: [(0, '49.618')] -[2024-07-05 11:55:26,449][47609] Updated weights for policy 0, policy_version 15291 (0.0008) -[2024-07-05 11:55:28,176][47609] Updated weights for policy 0, policy_version 15301 (0.0008) -[2024-07-05 11:55:29,880][47609] Updated weights for policy 0, policy_version 15311 (0.0008) -[2024-07-05 11:55:31,400][25826] Fps is (10 sec: 49151.6, 60 sec: 47923.2, 300 sec: 39146.0). Total num frames: 105496576. Throughput: 0: 11989.2. Samples: 1370136. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:55:31,401][25826] Avg episode reward: [(0, '47.196')] -[2024-07-05 11:55:31,552][47609] Updated weights for policy 0, policy_version 15321 (0.0008) -[2024-07-05 11:55:33,271][47609] Updated weights for policy 0, policy_version 15331 (0.0008) -[2024-07-05 11:55:35,033][47609] Updated weights for policy 0, policy_version 15341 (0.0008) -[2024-07-05 11:55:36,400][25826] Fps is (10 sec: 47514.0, 60 sec: 47650.2, 300 sec: 39378.1). Total num frames: 105725952. Throughput: 0: 11992.4. Samples: 1405600. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 11:55:36,401][25826] Avg episode reward: [(0, '50.593')] -[2024-07-05 11:55:36,770][47609] Updated weights for policy 0, policy_version 15351 (0.0009) -[2024-07-05 11:55:38,597][47609] Updated weights for policy 0, policy_version 15361 (0.0008) -[2024-07-05 11:55:40,330][47609] Updated weights for policy 0, policy_version 15371 (0.0007) -[2024-07-05 11:55:41,401][25826] Fps is (10 sec: 46694.5, 60 sec: 47786.7, 300 sec: 39649.3). Total num frames: 105963520. Throughput: 0: 11961.1. Samples: 1475316. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:55:41,403][25826] Avg episode reward: [(0, '48.493')] -[2024-07-05 11:55:42,077][47609] Updated weights for policy 0, policy_version 15381 (0.0009) -[2024-07-05 11:55:43,797][47609] Updated weights for policy 0, policy_version 15391 (0.0010) -[2024-07-05 11:55:45,541][47609] Updated weights for policy 0, policy_version 15401 (0.0007) -[2024-07-05 11:55:46,400][25826] Fps is (10 sec: 47512.0, 60 sec: 47786.4, 300 sec: 39902.9). Total num frames: 106201088. Throughput: 0: 11940.3. Samples: 1546312. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:55:46,402][25826] Avg episode reward: [(0, '49.024')] -[2024-07-05 11:55:47,227][47609] Updated weights for policy 0, policy_version 15411 (0.0008) -[2024-07-05 11:55:48,959][47609] Updated weights for policy 0, policy_version 15421 (0.0010) -[2024-07-05 11:55:50,687][47609] Updated weights for policy 0, policy_version 15431 (0.0008) -[2024-07-05 11:55:51,400][25826] Fps is (10 sec: 47512.5, 60 sec: 47786.4, 300 sec: 40140.7). Total num frames: 106438656. Throughput: 0: 11934.2. Samples: 1581720. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:55:51,401][25826] Avg episode reward: [(0, '47.873')] -[2024-07-05 11:55:52,361][47609] Updated weights for policy 0, policy_version 15441 (0.0009) -[2024-07-05 11:55:54,070][47609] Updated weights for policy 0, policy_version 15451 (0.0007) -[2024-07-05 11:55:55,764][47609] Updated weights for policy 0, policy_version 15461 (0.0010) -[2024-07-05 11:55:56,400][25826] Fps is (10 sec: 47515.4, 60 sec: 47786.8, 300 sec: 40364.2). Total num frames: 106676224. Throughput: 0: 11942.6. Samples: 1654224. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 11:55:56,401][25826] Avg episode reward: [(0, '49.681')] -[2024-07-05 11:55:57,462][47609] Updated weights for policy 0, policy_version 15471 (0.0008) -[2024-07-05 11:55:59,226][47609] Updated weights for policy 0, policy_version 15481 (0.0009) -[2024-07-05 11:56:00,917][47609] Updated weights for policy 0, policy_version 15491 (0.0008) -[2024-07-05 11:56:01,400][25826] Fps is (10 sec: 47515.5, 60 sec: 47786.7, 300 sec: 40574.5). Total num frames: 106913792. Throughput: 0: 11943.1. Samples: 1725928. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:56:01,401][25826] Avg episode reward: [(0, '48.940')] -[2024-07-05 11:56:02,657][47609] Updated weights for policy 0, policy_version 15501 (0.0009) -[2024-07-05 11:56:04,389][47609] Updated weights for policy 0, policy_version 15511 (0.0008) -[2024-07-05 11:56:06,100][47609] Updated weights for policy 0, policy_version 15521 (0.0008) -[2024-07-05 11:56:06,400][25826] Fps is (10 sec: 47513.2, 60 sec: 47650.2, 300 sec: 40772.8). Total num frames: 107151360. Throughput: 0: 11924.6. Samples: 1761388. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:56:06,401][25826] Avg episode reward: [(0, '47.512')] -[2024-07-05 11:56:07,794][47609] Updated weights for policy 0, policy_version 15531 (0.0007) -[2024-07-05 11:56:09,470][47609] Updated weights for policy 0, policy_version 15541 (0.0013) -[2024-07-05 11:56:11,158][47609] Updated weights for policy 0, policy_version 15551 (0.0008) -[2024-07-05 11:56:11,400][25826] Fps is (10 sec: 48331.4, 60 sec: 47923.0, 300 sec: 41005.5). Total num frames: 107397120. Throughput: 0: 11924.5. Samples: 1833704. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:56:11,402][25826] Avg episode reward: [(0, '47.037')] -[2024-07-05 11:56:12,835][47609] Updated weights for policy 0, policy_version 15561 (0.0011) -[2024-07-05 11:56:14,515][47609] Updated weights for policy 0, policy_version 15571 (0.0008) -[2024-07-05 11:56:16,199][47609] Updated weights for policy 0, policy_version 15581 (0.0008) -[2024-07-05 11:56:16,400][25826] Fps is (10 sec: 49152.1, 60 sec: 47923.2, 300 sec: 41225.7). Total num frames: 107642880. Throughput: 0: 11921.2. Samples: 1906588. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:56:16,401][25826] Avg episode reward: [(0, '48.869')] -[2024-07-05 11:56:17,911][47609] Updated weights for policy 0, policy_version 15591 (0.0011) -[2024-07-05 11:56:19,571][47609] Updated weights for policy 0, policy_version 15601 (0.0011) -[2024-07-05 11:56:21,272][47609] Updated weights for policy 0, policy_version 15611 (0.0008) -[2024-07-05 11:56:21,400][25826] Fps is (10 sec: 48333.4, 60 sec: 47923.1, 300 sec: 41391.1). Total num frames: 107880448. Throughput: 0: 11938.0. Samples: 1942812. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 11:56:21,401][25826] Avg episode reward: [(0, '48.260')] -[2024-07-05 11:56:22,978][47609] Updated weights for policy 0, policy_version 15621 (0.0008) -[2024-07-05 11:56:24,694][47609] Updated weights for policy 0, policy_version 15631 (0.0008) -[2024-07-05 11:56:26,379][47609] Updated weights for policy 0, policy_version 15641 (0.0008) -[2024-07-05 11:56:26,400][25826] Fps is (10 sec: 48332.8, 60 sec: 47923.2, 300 sec: 41590.2). Total num frames: 108126208. Throughput: 0: 12004.1. Samples: 2015500. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:56:26,401][25826] Avg episode reward: [(0, '49.126')] -[2024-07-05 11:56:28,030][47609] Updated weights for policy 0, policy_version 15651 (0.0008) -[2024-07-05 11:56:29,729][47609] Updated weights for policy 0, policy_version 15661 (0.0011) -[2024-07-05 11:56:31,400][25826] Fps is (10 sec: 48332.4, 60 sec: 47786.6, 300 sec: 41738.2). Total num frames: 108363776. Throughput: 0: 12039.1. Samples: 2088068. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:56:31,401][25826] Avg episode reward: [(0, '47.778')] -[2024-07-05 11:56:31,427][47609] Updated weights for policy 0, policy_version 15671 (0.0008) -[2024-07-05 11:56:33,174][47609] Updated weights for policy 0, policy_version 15681 (0.0007) -[2024-07-05 11:56:34,874][47609] Updated weights for policy 0, policy_version 15691 (0.0008) -[2024-07-05 11:56:36,400][25826] Fps is (10 sec: 48332.6, 60 sec: 48059.7, 300 sec: 41919.1). Total num frames: 108609536. Throughput: 0: 12058.7. Samples: 2124360. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:56:36,401][25826] Avg episode reward: [(0, '48.775')] -[2024-07-05 11:56:36,588][47609] Updated weights for policy 0, policy_version 15701 (0.0007) -[2024-07-05 11:56:38,210][47609] Updated weights for policy 0, policy_version 15711 (0.0008) -[2024-07-05 11:56:39,901][47609] Updated weights for policy 0, policy_version 15721 (0.0008) -[2024-07-05 11:56:41,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48059.7, 300 sec: 42052.3). Total num frames: 108847104. Throughput: 0: 12054.3. Samples: 2196668. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:56:41,401][25826] Avg episode reward: [(0, '51.225')] -[2024-07-05 11:56:41,609][47609] Updated weights for policy 0, policy_version 15731 (0.0007) -[2024-07-05 11:56:43,273][47609] Updated weights for policy 0, policy_version 15741 (0.0008) -[2024-07-05 11:56:44,966][47609] Updated weights for policy 0, policy_version 15751 (0.0008) -[2024-07-05 11:56:46,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48196.5, 300 sec: 42217.4). Total num frames: 109092864. Throughput: 0: 12081.6. Samples: 2269600. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:56:46,401][25826] Avg episode reward: [(0, '49.588')] -[2024-07-05 11:56:46,692][47609] Updated weights for policy 0, policy_version 15761 (0.0010) -[2024-07-05 11:56:48,369][47609] Updated weights for policy 0, policy_version 15771 (0.0010) -[2024-07-05 11:56:50,092][47609] Updated weights for policy 0, policy_version 15781 (0.0008) -[2024-07-05 11:56:51,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48196.4, 300 sec: 42337.7). Total num frames: 109330432. Throughput: 0: 12100.4. Samples: 2305908. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:56:51,401][25826] Avg episode reward: [(0, '48.260')] -[2024-07-05 11:56:51,742][47609] Updated weights for policy 0, policy_version 15791 (0.0008) -[2024-07-05 11:56:53,426][47609] Updated weights for policy 0, policy_version 15801 (0.0007) -[2024-07-05 11:56:55,092][47609] Updated weights for policy 0, policy_version 15811 (0.0008) -[2024-07-05 11:56:56,400][25826] Fps is (10 sec: 48331.9, 60 sec: 48332.6, 300 sec: 42489.1). Total num frames: 109576192. Throughput: 0: 12113.7. Samples: 2378820. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:56:56,401][25826] Avg episode reward: [(0, '48.332')] -[2024-07-05 11:56:56,800][47609] Updated weights for policy 0, policy_version 15821 (0.0008) -[2024-07-05 11:56:58,512][47609] Updated weights for policy 0, policy_version 15831 (0.0007) -[2024-07-05 11:57:00,212][47609] Updated weights for policy 0, policy_version 15841 (0.0009) -[2024-07-05 11:57:01,400][25826] Fps is (10 sec: 49151.5, 60 sec: 48469.1, 300 sec: 42634.0). Total num frames: 109821952. Throughput: 0: 12107.2. Samples: 2451416. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:57:01,401][25826] Avg episode reward: [(0, '48.866')] -[2024-07-05 11:57:01,413][47589] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000015848_109821952.pth... -[2024-07-05 11:57:01,498][47589] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000014651_100016128.pth -[2024-07-05 11:57:01,922][47609] Updated weights for policy 0, policy_version 15851 (0.0008) -[2024-07-05 11:57:03,598][47609] Updated weights for policy 0, policy_version 15861 (0.0007) -[2024-07-05 11:57:05,294][47609] Updated weights for policy 0, policy_version 15871 (0.0008) -[2024-07-05 11:57:06,400][25826] Fps is (10 sec: 48333.2, 60 sec: 48469.3, 300 sec: 42737.8). Total num frames: 110059520. Throughput: 0: 12109.0. Samples: 2487716. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:57:06,401][25826] Avg episode reward: [(0, '50.955')] -[2024-07-05 11:57:06,974][47609] Updated weights for policy 0, policy_version 15881 (0.0008) -[2024-07-05 11:57:08,637][47609] Updated weights for policy 0, policy_version 15891 (0.0010) -[2024-07-05 11:57:10,294][47609] Updated weights for policy 0, policy_version 15901 (0.0008) -[2024-07-05 11:57:11,400][25826] Fps is (10 sec: 48333.7, 60 sec: 48469.5, 300 sec: 42871.5). Total num frames: 110305280. Throughput: 0: 12112.0. Samples: 2560540. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:57:11,401][25826] Avg episode reward: [(0, '48.431')] -[2024-07-05 11:57:12,020][47609] Updated weights for policy 0, policy_version 15911 (0.0008) -[2024-07-05 11:57:13,718][47609] Updated weights for policy 0, policy_version 15921 (0.0008) -[2024-07-05 11:57:15,430][47609] Updated weights for policy 0, policy_version 15931 (0.0008) -[2024-07-05 11:57:16,400][25826] Fps is (10 sec: 48332.6, 60 sec: 48332.7, 300 sec: 42966.2). Total num frames: 110542848. Throughput: 0: 12103.6. Samples: 2632732. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:57:16,402][25826] Avg episode reward: [(0, '47.300')] -[2024-07-05 11:57:17,149][47609] Updated weights for policy 0, policy_version 15941 (0.0008) -[2024-07-05 11:57:18,836][47609] Updated weights for policy 0, policy_version 15951 (0.0008) -[2024-07-05 11:57:20,528][47609] Updated weights for policy 0, policy_version 15961 (0.0008) -[2024-07-05 11:57:21,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48469.4, 300 sec: 43089.9). Total num frames: 110788608. Throughput: 0: 12105.1. Samples: 2669088. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:57:21,401][25826] Avg episode reward: [(0, '50.048')] -[2024-07-05 11:57:22,162][47609] Updated weights for policy 0, policy_version 15971 (0.0007) -[2024-07-05 11:57:23,861][47609] Updated weights for policy 0, policy_version 15981 (0.0010) -[2024-07-05 11:57:25,550][47609] Updated weights for policy 0, policy_version 15991 (0.0009) -[2024-07-05 11:57:26,400][25826] Fps is (10 sec: 49152.0, 60 sec: 48469.2, 300 sec: 43208.8). Total num frames: 111034368. Throughput: 0: 12124.6. Samples: 2742276. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:57:26,401][25826] Avg episode reward: [(0, '41.601')] -[2024-07-05 11:57:27,222][47609] Updated weights for policy 0, policy_version 16001 (0.0010) -[2024-07-05 11:57:28,940][47609] Updated weights for policy 0, policy_version 16011 (0.0010) -[2024-07-05 11:57:30,630][47609] Updated weights for policy 0, policy_version 16021 (0.0008) -[2024-07-05 11:57:31,400][25826] Fps is (10 sec: 48331.2, 60 sec: 48469.2, 300 sec: 43291.5). Total num frames: 111271936. Throughput: 0: 12113.1. Samples: 2814692. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:57:31,401][25826] Avg episode reward: [(0, '47.831')] -[2024-07-05 11:57:32,286][47609] Updated weights for policy 0, policy_version 16031 (0.0011) -[2024-07-05 11:57:34,001][47609] Updated weights for policy 0, policy_version 16041 (0.0008) -[2024-07-05 11:57:35,723][47609] Updated weights for policy 0, policy_version 16051 (0.0007) -[2024-07-05 11:57:36,400][25826] Fps is (10 sec: 48333.3, 60 sec: 48469.3, 300 sec: 43402.1). Total num frames: 111517696. Throughput: 0: 12117.3. Samples: 2851184. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:57:36,401][25826] Avg episode reward: [(0, '48.059')] -[2024-07-05 11:57:37,391][47609] Updated weights for policy 0, policy_version 16061 (0.0008) -[2024-07-05 11:57:39,071][47609] Updated weights for policy 0, policy_version 16071 (0.0008) -[2024-07-05 11:57:40,759][47609] Updated weights for policy 0, policy_version 16081 (0.0008) -[2024-07-05 11:57:41,400][25826] Fps is (10 sec: 48333.7, 60 sec: 48469.3, 300 sec: 43478.3). Total num frames: 111755264. Throughput: 0: 12113.5. Samples: 2923928. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:57:41,401][25826] Avg episode reward: [(0, '48.349')] -[2024-07-05 11:57:42,432][47609] Updated weights for policy 0, policy_version 16091 (0.0008) -[2024-07-05 11:57:44,131][47609] Updated weights for policy 0, policy_version 16101 (0.0008) -[2024-07-05 11:57:45,823][47609] Updated weights for policy 0, policy_version 16111 (0.0008) -[2024-07-05 11:57:46,400][25826] Fps is (10 sec: 48332.0, 60 sec: 48469.2, 300 sec: 43581.4). Total num frames: 112001024. Throughput: 0: 12105.0. Samples: 2996140. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:57:46,401][25826] Avg episode reward: [(0, '46.739')] -[2024-07-05 11:57:47,543][47609] Updated weights for policy 0, policy_version 16121 (0.0008) -[2024-07-05 11:57:49,281][47609] Updated weights for policy 0, policy_version 16131 (0.0008) -[2024-07-05 11:57:50,989][47609] Updated weights for policy 0, policy_version 16141 (0.0007) -[2024-07-05 11:57:51,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48469.2, 300 sec: 43651.6). Total num frames: 112238592. Throughput: 0: 12103.7. Samples: 3032384. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:57:51,401][25826] Avg episode reward: [(0, '47.476')] -[2024-07-05 11:57:52,674][47609] Updated weights for policy 0, policy_version 16151 (0.0010) -[2024-07-05 11:57:54,341][47609] Updated weights for policy 0, policy_version 16161 (0.0010) -[2024-07-05 11:57:56,043][47609] Updated weights for policy 0, policy_version 16171 (0.0008) -[2024-07-05 11:57:56,400][25826] Fps is (10 sec: 48333.4, 60 sec: 48469.4, 300 sec: 43748.1). Total num frames: 112484352. Throughput: 0: 12098.9. Samples: 3104992. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:57:56,401][25826] Avg episode reward: [(0, '46.873')] -[2024-07-05 11:57:57,706][47609] Updated weights for policy 0, policy_version 16181 (0.0007) -[2024-07-05 11:57:59,401][47609] Updated weights for policy 0, policy_version 16191 (0.0011) -[2024-07-05 11:58:01,072][47609] Updated weights for policy 0, policy_version 16201 (0.0010) -[2024-07-05 11:58:01,400][25826] Fps is (10 sec: 48333.4, 60 sec: 48332.9, 300 sec: 43813.1). Total num frames: 112721920. Throughput: 0: 12109.4. Samples: 3177652. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 11:58:01,401][25826] Avg episode reward: [(0, '47.640')] -[2024-07-05 11:58:02,785][47609] Updated weights for policy 0, policy_version 16211 (0.0010) -[2024-07-05 11:58:04,467][47609] Updated weights for policy 0, policy_version 16221 (0.0008) -[2024-07-05 11:58:06,207][47609] Updated weights for policy 0, policy_version 16231 (0.0008) -[2024-07-05 11:58:06,401][25826] Fps is (10 sec: 48329.2, 60 sec: 48468.8, 300 sec: 43903.4). Total num frames: 112967680. Throughput: 0: 12105.0. Samples: 3213824. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:58:06,402][25826] Avg episode reward: [(0, '47.436')] -[2024-07-05 11:58:07,885][47609] Updated weights for policy 0, policy_version 16241 (0.0007) -[2024-07-05 11:58:09,592][47609] Updated weights for policy 0, policy_version 16251 (0.0008) -[2024-07-05 11:58:11,279][47609] Updated weights for policy 0, policy_version 16261 (0.0008) -[2024-07-05 11:58:11,400][25826] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 44681.8). Total num frames: 113205248. Throughput: 0: 12087.5. Samples: 3286212. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:58:11,401][25826] Avg episode reward: [(0, '45.413')] -[2024-07-05 11:58:12,962][47609] Updated weights for policy 0, policy_version 16271 (0.0008) -[2024-07-05 11:58:14,702][47609] Updated weights for policy 0, policy_version 16281 (0.0008) -[2024-07-05 11:58:16,365][47609] Updated weights for policy 0, policy_version 16291 (0.0008) -[2024-07-05 11:58:16,400][25826] Fps is (10 sec: 48336.6, 60 sec: 48469.4, 300 sec: 45014.6). Total num frames: 113451008. Throughput: 0: 12091.8. Samples: 3358820. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:58:16,401][25826] Avg episode reward: [(0, '49.892')] -[2024-07-05 11:58:18,027][47609] Updated weights for policy 0, policy_version 16301 (0.0010) -[2024-07-05 11:58:19,723][47609] Updated weights for policy 0, policy_version 16311 (0.0007) -[2024-07-05 11:58:21,400][25826] Fps is (10 sec: 48331.9, 60 sec: 48332.6, 300 sec: 45208.7). Total num frames: 113688576. Throughput: 0: 12100.3. Samples: 3395700. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:58:21,401][25826] Avg episode reward: [(0, '46.368')] -[2024-07-05 11:58:21,406][47609] Updated weights for policy 0, policy_version 16321 (0.0008) -[2024-07-05 11:58:23,107][47609] Updated weights for policy 0, policy_version 16331 (0.0007) -[2024-07-05 11:58:24,814][47609] Updated weights for policy 0, policy_version 16341 (0.0007) -[2024-07-05 11:58:26,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48332.9, 300 sec: 45680.9). Total num frames: 113934336. Throughput: 0: 12091.6. Samples: 3468048. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:58:26,401][25826] Avg episode reward: [(0, '47.109')] -[2024-07-05 11:58:26,497][47609] Updated weights for policy 0, policy_version 16351 (0.0008) -[2024-07-05 11:58:28,211][47609] Updated weights for policy 0, policy_version 16361 (0.0008) -[2024-07-05 11:58:29,941][47609] Updated weights for policy 0, policy_version 16371 (0.0009) -[2024-07-05 11:58:31,400][25826] Fps is (10 sec: 48334.0, 60 sec: 48333.1, 300 sec: 46292.2). Total num frames: 114171904. Throughput: 0: 12095.8. Samples: 3540448. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:58:31,401][25826] Avg episode reward: [(0, '48.730')] -[2024-07-05 11:58:31,585][47609] Updated weights for policy 0, policy_version 16381 (0.0007) -[2024-07-05 11:58:33,245][47609] Updated weights for policy 0, policy_version 16391 (0.0008) -[2024-07-05 11:58:34,935][47609] Updated weights for policy 0, policy_version 16401 (0.0011) -[2024-07-05 11:58:36,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 46958.3). Total num frames: 114417664. Throughput: 0: 12098.3. Samples: 3576808. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:58:36,404][25826] Avg episode reward: [(0, '46.547')] -[2024-07-05 11:58:36,648][47609] Updated weights for policy 0, policy_version 16411 (0.0010) -[2024-07-05 11:58:38,338][47609] Updated weights for policy 0, policy_version 16421 (0.0010) -[2024-07-05 11:58:40,052][47609] Updated weights for policy 0, policy_version 16431 (0.0009) -[2024-07-05 11:58:41,400][25826] Fps is (10 sec: 49151.2, 60 sec: 48469.3, 300 sec: 47514.0). Total num frames: 114663424. Throughput: 0: 12103.6. Samples: 3649656. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:58:41,401][25826] Avg episode reward: [(0, '46.865')] -[2024-07-05 11:58:41,738][47609] Updated weights for policy 0, policy_version 16441 (0.0008) -[2024-07-05 11:58:43,451][47609] Updated weights for policy 0, policy_version 16451 (0.0008) -[2024-07-05 11:58:45,125][47609] Updated weights for policy 0, policy_version 16461 (0.0014) -[2024-07-05 11:58:46,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48333.0, 300 sec: 47874.6). Total num frames: 114900992. Throughput: 0: 12097.9. Samples: 3722056. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:58:46,401][25826] Avg episode reward: [(0, '51.253')] -[2024-07-05 11:58:46,841][47609] Updated weights for policy 0, policy_version 16471 (0.0008) -[2024-07-05 11:58:48,541][47609] Updated weights for policy 0, policy_version 16481 (0.0010) -[2024-07-05 11:58:50,248][47609] Updated weights for policy 0, policy_version 16491 (0.0009) -[2024-07-05 11:58:51,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48469.4, 300 sec: 47957.9). Total num frames: 115146752. Throughput: 0: 12100.3. Samples: 3758328. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:58:51,401][25826] Avg episode reward: [(0, '45.777')] -[2024-07-05 11:58:51,922][47609] Updated weights for policy 0, policy_version 16501 (0.0008) -[2024-07-05 11:58:53,605][47609] Updated weights for policy 0, policy_version 16511 (0.0008) -[2024-07-05 11:58:55,310][47609] Updated weights for policy 0, policy_version 16521 (0.0008) -[2024-07-05 11:58:56,400][25826] Fps is (10 sec: 48332.4, 60 sec: 48332.8, 300 sec: 47985.7). Total num frames: 115384320. Throughput: 0: 12094.9. Samples: 3830484. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:58:56,401][25826] Avg episode reward: [(0, '49.054')] -[2024-07-05 11:58:56,965][47609] Updated weights for policy 0, policy_version 16531 (0.0010) -[2024-07-05 11:58:58,647][47609] Updated weights for policy 0, policy_version 16541 (0.0008) -[2024-07-05 11:59:00,358][47609] Updated weights for policy 0, policy_version 16551 (0.0010) -[2024-07-05 11:59:01,400][25826] Fps is (10 sec: 48333.2, 60 sec: 48469.4, 300 sec: 48013.5). Total num frames: 115630080. Throughput: 0: 12103.6. Samples: 3903480. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:59:01,401][25826] Avg episode reward: [(0, '49.043')] -[2024-07-05 11:59:01,414][47589] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000016557_115630080.pth... -[2024-07-05 11:59:01,501][47589] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000015144_104054784.pth -[2024-07-05 11:59:02,056][47609] Updated weights for policy 0, policy_version 16561 (0.0007) -[2024-07-05 11:59:03,779][47609] Updated weights for policy 0, policy_version 16571 (0.0010) -[2024-07-05 11:59:05,469][47609] Updated weights for policy 0, policy_version 16581 (0.0008) -[2024-07-05 11:59:06,401][25826] Fps is (10 sec: 48328.0, 60 sec: 48332.6, 300 sec: 48096.6). Total num frames: 115867648. Throughput: 0: 12083.7. Samples: 3939476. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:59:06,402][25826] Avg episode reward: [(0, '45.300')] -[2024-07-05 11:59:07,122][47609] Updated weights for policy 0, policy_version 16591 (0.0009) -[2024-07-05 11:59:08,804][47609] Updated weights for policy 0, policy_version 16601 (0.0008) -[2024-07-05 11:59:10,494][47609] Updated weights for policy 0, policy_version 16611 (0.0008) -[2024-07-05 11:59:11,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48469.4, 300 sec: 48124.5). Total num frames: 116113408. Throughput: 0: 12088.3. Samples: 4012020. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 11:59:11,401][25826] Avg episode reward: [(0, '49.799')] -[2024-07-05 11:59:12,175][47609] Updated weights for policy 0, policy_version 16621 (0.0011) -[2024-07-05 11:59:13,883][47609] Updated weights for policy 0, policy_version 16631 (0.0008) -[2024-07-05 11:59:15,621][47609] Updated weights for policy 0, policy_version 16641 (0.0008) -[2024-07-05 11:59:16,400][25826] Fps is (10 sec: 48337.9, 60 sec: 48332.8, 300 sec: 48124.5). Total num frames: 116350976. Throughput: 0: 12099.4. Samples: 4084920. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:59:16,401][25826] Avg episode reward: [(0, '48.876')] -[2024-07-05 11:59:17,283][47609] Updated weights for policy 0, policy_version 16651 (0.0008) -[2024-07-05 11:59:19,003][47609] Updated weights for policy 0, policy_version 16661 (0.0007) -[2024-07-05 11:59:20,666][47609] Updated weights for policy 0, policy_version 16671 (0.0007) -[2024-07-05 11:59:21,400][25826] Fps is (10 sec: 48332.6, 60 sec: 48469.5, 300 sec: 48152.3). Total num frames: 116596736. Throughput: 0: 12099.6. Samples: 4121292. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:59:21,401][25826] Avg episode reward: [(0, '46.642')] -[2024-07-05 11:59:22,331][47609] Updated weights for policy 0, policy_version 16681 (0.0010) -[2024-07-05 11:59:23,994][47609] Updated weights for policy 0, policy_version 16691 (0.0010) -[2024-07-05 11:59:25,690][47609] Updated weights for policy 0, policy_version 16701 (0.0007) -[2024-07-05 11:59:26,400][25826] Fps is (10 sec: 49152.1, 60 sec: 48469.4, 300 sec: 48207.9). Total num frames: 116842496. Throughput: 0: 12108.0. Samples: 4194516. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:59:26,401][25826] Avg episode reward: [(0, '43.695')] -[2024-07-05 11:59:27,409][47609] Updated weights for policy 0, policy_version 16711 (0.0008) -[2024-07-05 11:59:29,087][47609] Updated weights for policy 0, policy_version 16721 (0.0008) -[2024-07-05 11:59:30,785][47609] Updated weights for policy 0, policy_version 16731 (0.0010) -[2024-07-05 11:59:31,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48469.3, 300 sec: 48180.1). Total num frames: 117080064. Throughput: 0: 12108.3. Samples: 4266928. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:59:31,401][25826] Avg episode reward: [(0, '45.352')] -[2024-07-05 11:59:32,461][47609] Updated weights for policy 0, policy_version 16741 (0.0008) -[2024-07-05 11:59:34,166][47609] Updated weights for policy 0, policy_version 16751 (0.0008) -[2024-07-05 11:59:35,879][47609] Updated weights for policy 0, policy_version 16761 (0.0008) -[2024-07-05 11:59:36,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48469.3, 300 sec: 48235.6). Total num frames: 117325824. Throughput: 0: 12112.8. Samples: 4303404. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:59:36,401][25826] Avg episode reward: [(0, '45.828')] -[2024-07-05 11:59:37,554][47609] Updated weights for policy 0, policy_version 16771 (0.0008) -[2024-07-05 11:59:39,273][47609] Updated weights for policy 0, policy_version 16781 (0.0010) -[2024-07-05 11:59:40,954][47609] Updated weights for policy 0, policy_version 16791 (0.0010) -[2024-07-05 11:59:41,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48332.9, 300 sec: 48235.6). Total num frames: 117563392. Throughput: 0: 12123.9. Samples: 4376060. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:59:41,401][25826] Avg episode reward: [(0, '49.866')] -[2024-07-05 11:59:42,633][47609] Updated weights for policy 0, policy_version 16801 (0.0009) -[2024-07-05 11:59:44,316][47609] Updated weights for policy 0, policy_version 16811 (0.0007) -[2024-07-05 11:59:46,029][47609] Updated weights for policy 0, policy_version 16821 (0.0010) -[2024-07-05 11:59:46,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48469.3, 300 sec: 48263.4). Total num frames: 117809152. Throughput: 0: 12113.7. Samples: 4448596. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 11:59:46,401][25826] Avg episode reward: [(0, '46.486')] -[2024-07-05 11:59:47,694][47609] Updated weights for policy 0, policy_version 16831 (0.0008) -[2024-07-05 11:59:49,425][47609] Updated weights for policy 0, policy_version 16841 (0.0008) -[2024-07-05 11:59:51,091][47609] Updated weights for policy 0, policy_version 16851 (0.0008) -[2024-07-05 11:59:51,400][25826] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 118046720. Throughput: 0: 12115.3. Samples: 4484652. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:59:51,401][25826] Avg episode reward: [(0, '48.265')] -[2024-07-05 11:59:52,834][47609] Updated weights for policy 0, policy_version 16861 (0.0008) -[2024-07-05 11:59:54,494][47609] Updated weights for policy 0, policy_version 16871 (0.0008) -[2024-07-05 11:59:56,214][47609] Updated weights for policy 0, policy_version 16881 (0.0010) -[2024-07-05 11:59:56,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48469.4, 300 sec: 48291.1). Total num frames: 118292480. Throughput: 0: 12114.1. Samples: 4557156. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 11:59:56,401][25826] Avg episode reward: [(0, '49.096')] -[2024-07-05 11:59:57,893][47609] Updated weights for policy 0, policy_version 16891 (0.0008) -[2024-07-05 11:59:59,616][47609] Updated weights for policy 0, policy_version 16901 (0.0007) -[2024-07-05 12:00:01,288][47609] Updated weights for policy 0, policy_version 16911 (0.0008) -[2024-07-05 12:00:01,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48332.7, 300 sec: 48263.4). Total num frames: 118530048. Throughput: 0: 12099.4. Samples: 4629392. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:00:01,401][25826] Avg episode reward: [(0, '46.860')] -[2024-07-05 12:00:03,011][47609] Updated weights for policy 0, policy_version 16921 (0.0008) -[2024-07-05 12:00:04,697][47609] Updated weights for policy 0, policy_version 16931 (0.0007) -[2024-07-05 12:00:06,384][47609] Updated weights for policy 0, policy_version 16941 (0.0007) -[2024-07-05 12:00:06,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48470.2, 300 sec: 48318.9). Total num frames: 118775808. Throughput: 0: 12097.5. Samples: 4665680. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:00:06,401][25826] Avg episode reward: [(0, '50.559')] -[2024-07-05 12:00:08,075][47609] Updated weights for policy 0, policy_version 16951 (0.0008) -[2024-07-05 12:00:09,765][47609] Updated weights for policy 0, policy_version 16961 (0.0008) -[2024-07-05 12:00:11,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 119013376. Throughput: 0: 12086.1. Samples: 4738392. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:00:11,401][25826] Avg episode reward: [(0, '46.820')] -[2024-07-05 12:00:11,457][47609] Updated weights for policy 0, policy_version 16971 (0.0008) -[2024-07-05 12:00:13,165][47609] Updated weights for policy 0, policy_version 16981 (0.0007) -[2024-07-05 12:00:14,841][47609] Updated weights for policy 0, policy_version 16991 (0.0008) -[2024-07-05 12:00:16,400][25826] Fps is (10 sec: 48332.1, 60 sec: 48469.2, 300 sec: 48318.9). Total num frames: 119259136. Throughput: 0: 12083.1. Samples: 4810668. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:00:16,401][25826] Avg episode reward: [(0, '48.962')] -[2024-07-05 12:00:16,551][47609] Updated weights for policy 0, policy_version 17001 (0.0007) -[2024-07-05 12:00:18,258][47609] Updated weights for policy 0, policy_version 17011 (0.0012) -[2024-07-05 12:00:19,968][47609] Updated weights for policy 0, policy_version 17021 (0.0008) -[2024-07-05 12:00:21,400][25826] Fps is (10 sec: 48332.6, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 119496704. Throughput: 0: 12077.8. Samples: 4846908. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:00:21,401][25826] Avg episode reward: [(0, '48.499')] -[2024-07-05 12:00:21,634][47609] Updated weights for policy 0, policy_version 17031 (0.0008) -[2024-07-05 12:00:23,323][47609] Updated weights for policy 0, policy_version 17041 (0.0011) -[2024-07-05 12:00:24,994][47609] Updated weights for policy 0, policy_version 17051 (0.0011) -[2024-07-05 12:00:26,400][25826] Fps is (10 sec: 48332.5, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 119742464. Throughput: 0: 12078.3. Samples: 4919588. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:00:26,401][25826] Avg episode reward: [(0, '46.598')] -[2024-07-05 12:00:26,727][47609] Updated weights for policy 0, policy_version 17061 (0.0007) -[2024-07-05 12:00:28,454][47609] Updated weights for policy 0, policy_version 17071 (0.0010) -[2024-07-05 12:00:30,163][47609] Updated weights for policy 0, policy_version 17081 (0.0008) -[2024-07-05 12:00:31,400][25826] Fps is (10 sec: 48332.2, 60 sec: 48332.7, 300 sec: 48318.9). Total num frames: 119980032. Throughput: 0: 12065.6. Samples: 4991552. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:00:31,401][25826] Avg episode reward: [(0, '50.511')] -[2024-07-05 12:00:31,841][47609] Updated weights for policy 0, policy_version 17091 (0.0008) -[2024-07-05 12:00:33,530][47609] Updated weights for policy 0, policy_version 17101 (0.0008) -[2024-07-05 12:00:35,215][47609] Updated weights for policy 0, policy_version 17111 (0.0008) -[2024-07-05 12:00:36,400][25826] Fps is (10 sec: 48333.6, 60 sec: 48332.8, 300 sec: 48346.7). Total num frames: 120225792. Throughput: 0: 12076.2. Samples: 5028080. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:00:36,401][25826] Avg episode reward: [(0, '49.026')] -[2024-07-05 12:00:36,894][47609] Updated weights for policy 0, policy_version 17121 (0.0008) -[2024-07-05 12:00:38,598][47609] Updated weights for policy 0, policy_version 17131 (0.0010) -[2024-07-05 12:00:40,282][47609] Updated weights for policy 0, policy_version 17141 (0.0008) -[2024-07-05 12:00:41,400][25826] Fps is (10 sec: 48333.6, 60 sec: 48332.7, 300 sec: 48346.7). Total num frames: 120463360. Throughput: 0: 12074.4. Samples: 5100504. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:00:41,401][25826] Avg episode reward: [(0, '47.921')] -[2024-07-05 12:00:41,977][47609] Updated weights for policy 0, policy_version 17151 (0.0008) -[2024-07-05 12:00:43,674][47609] Updated weights for policy 0, policy_version 17161 (0.0008) -[2024-07-05 12:00:45,422][47609] Updated weights for policy 0, policy_version 17171 (0.0007) -[2024-07-05 12:00:46,400][25826] Fps is (10 sec: 47513.3, 60 sec: 48196.2, 300 sec: 48346.7). Total num frames: 120700928. Throughput: 0: 12078.3. Samples: 5172916. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:00:46,401][25826] Avg episode reward: [(0, '47.727')] -[2024-07-05 12:00:47,101][47609] Updated weights for policy 0, policy_version 17181 (0.0011) -[2024-07-05 12:00:48,797][47609] Updated weights for policy 0, policy_version 17191 (0.0007) -[2024-07-05 12:00:50,491][47609] Updated weights for policy 0, policy_version 17201 (0.0008) -[2024-07-05 12:00:51,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48374.4). Total num frames: 120946688. Throughput: 0: 12069.1. Samples: 5208792. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:00:51,401][25826] Avg episode reward: [(0, '48.049')] -[2024-07-05 12:00:52,165][47609] Updated weights for policy 0, policy_version 17211 (0.0012) -[2024-07-05 12:00:53,860][47609] Updated weights for policy 0, policy_version 17221 (0.0007) -[2024-07-05 12:00:55,570][47609] Updated weights for policy 0, policy_version 17231 (0.0008) -[2024-07-05 12:00:56,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48374.4). Total num frames: 121184256. Throughput: 0: 12069.9. Samples: 5281536. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:00:56,401][25826] Avg episode reward: [(0, '45.493')] -[2024-07-05 12:00:57,295][47609] Updated weights for policy 0, policy_version 17241 (0.0008) -[2024-07-05 12:00:59,024][47609] Updated weights for policy 0, policy_version 17251 (0.0008) -[2024-07-05 12:01:00,721][47609] Updated weights for policy 0, policy_version 17261 (0.0008) -[2024-07-05 12:01:01,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48402.2). Total num frames: 121430016. Throughput: 0: 12064.9. Samples: 5353588. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:01:01,401][25826] Avg episode reward: [(0, '46.366')] -[2024-07-05 12:01:01,413][47589] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000017265_121430016.pth... -[2024-07-05 12:01:01,494][47589] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000015848_109821952.pth -[2024-07-05 12:01:02,426][47609] Updated weights for policy 0, policy_version 17271 (0.0007) -[2024-07-05 12:01:04,093][47609] Updated weights for policy 0, policy_version 17281 (0.0008) -[2024-07-05 12:01:05,774][47609] Updated weights for policy 0, policy_version 17291 (0.0010) -[2024-07-05 12:01:06,401][25826] Fps is (10 sec: 48328.1, 60 sec: 48195.5, 300 sec: 48374.3). Total num frames: 121667584. Throughput: 0: 12059.9. Samples: 5389612. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:01:06,402][25826] Avg episode reward: [(0, '48.665')] -[2024-07-05 12:01:07,448][47609] Updated weights for policy 0, policy_version 17301 (0.0008) -[2024-07-05 12:01:09,181][47609] Updated weights for policy 0, policy_version 17311 (0.0008) -[2024-07-05 12:01:10,901][47609] Updated weights for policy 0, policy_version 17321 (0.0008) -[2024-07-05 12:01:11,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48374.4). Total num frames: 121913344. Throughput: 0: 12058.4. Samples: 5462216. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:01:11,401][25826] Avg episode reward: [(0, '48.646')] -[2024-07-05 12:01:12,568][47609] Updated weights for policy 0, policy_version 17331 (0.0009) -[2024-07-05 12:01:14,246][47609] Updated weights for policy 0, policy_version 17341 (0.0009) -[2024-07-05 12:01:15,957][47609] Updated weights for policy 0, policy_version 17351 (0.0008) -[2024-07-05 12:01:16,400][25826] Fps is (10 sec: 48337.3, 60 sec: 48196.3, 300 sec: 48374.5). Total num frames: 122150912. Throughput: 0: 12066.0. Samples: 5534520. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:01:16,401][25826] Avg episode reward: [(0, '43.730')] -[2024-07-05 12:01:17,635][47609] Updated weights for policy 0, policy_version 17361 (0.0008) -[2024-07-05 12:01:19,350][47609] Updated weights for policy 0, policy_version 17371 (0.0007) -[2024-07-05 12:01:21,063][47609] Updated weights for policy 0, policy_version 17381 (0.0009) -[2024-07-05 12:01:21,400][25826] Fps is (10 sec: 47513.5, 60 sec: 48196.3, 300 sec: 48346.7). Total num frames: 122388480. Throughput: 0: 12059.3. Samples: 5570748. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:01:21,401][25826] Avg episode reward: [(0, '46.500')] -[2024-07-05 12:01:22,760][47609] Updated weights for policy 0, policy_version 17391 (0.0007) -[2024-07-05 12:01:24,430][47609] Updated weights for policy 0, policy_version 17401 (0.0008) -[2024-07-05 12:01:26,146][47609] Updated weights for policy 0, policy_version 17411 (0.0007) -[2024-07-05 12:01:26,400][25826] Fps is (10 sec: 48332.9, 60 sec: 48196.4, 300 sec: 48374.5). Total num frames: 122634240. Throughput: 0: 12059.1. Samples: 5643164. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:01:26,401][25826] Avg episode reward: [(0, '48.971')] -[2024-07-05 12:01:27,900][47609] Updated weights for policy 0, policy_version 17421 (0.0008) -[2024-07-05 12:01:29,560][47609] Updated weights for policy 0, policy_version 17431 (0.0008) -[2024-07-05 12:01:31,252][47609] Updated weights for policy 0, policy_version 17441 (0.0009) -[2024-07-05 12:01:31,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48196.4, 300 sec: 48346.7). Total num frames: 122871808. Throughput: 0: 12057.3. Samples: 5715496. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:01:31,401][25826] Avg episode reward: [(0, '48.182')] -[2024-07-05 12:01:32,937][47609] Updated weights for policy 0, policy_version 17451 (0.0008) -[2024-07-05 12:01:34,638][47609] Updated weights for policy 0, policy_version 17461 (0.0007) -[2024-07-05 12:01:36,335][47609] Updated weights for policy 0, policy_version 17471 (0.0007) -[2024-07-05 12:01:36,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48374.5). Total num frames: 123117568. Throughput: 0: 12069.0. Samples: 5751896. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:01:36,401][25826] Avg episode reward: [(0, '45.543')] -[2024-07-05 12:01:38,032][47609] Updated weights for policy 0, policy_version 17481 (0.0008) -[2024-07-05 12:01:39,730][47609] Updated weights for policy 0, policy_version 17491 (0.0008) -[2024-07-05 12:01:41,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48346.7). Total num frames: 123355136. Throughput: 0: 12058.2. Samples: 5824156. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:01:41,401][25826] Avg episode reward: [(0, '48.585')] -[2024-07-05 12:01:41,422][47609] Updated weights for policy 0, policy_version 17501 (0.0007) -[2024-07-05 12:01:43,141][47609] Updated weights for policy 0, policy_version 17511 (0.0008) -[2024-07-05 12:01:44,859][47609] Updated weights for policy 0, policy_version 17521 (0.0008) -[2024-07-05 12:01:46,400][25826] Fps is (10 sec: 48332.2, 60 sec: 48332.7, 300 sec: 48374.4). Total num frames: 123600896. Throughput: 0: 12061.9. Samples: 5896376. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:01:46,401][25826] Avg episode reward: [(0, '46.208')] -[2024-07-05 12:01:46,568][47609] Updated weights for policy 0, policy_version 17531 (0.0008) -[2024-07-05 12:01:48,270][47609] Updated weights for policy 0, policy_version 17541 (0.0008) -[2024-07-05 12:01:49,949][47609] Updated weights for policy 0, policy_version 17551 (0.0009) -[2024-07-05 12:01:51,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48346.7). Total num frames: 123838464. Throughput: 0: 12058.5. Samples: 5932232. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:01:51,401][25826] Avg episode reward: [(0, '50.190')] -[2024-07-05 12:01:51,655][47609] Updated weights for policy 0, policy_version 17561 (0.0008) -[2024-07-05 12:01:53,344][47609] Updated weights for policy 0, policy_version 17571 (0.0007) -[2024-07-05 12:01:55,073][47609] Updated weights for policy 0, policy_version 17581 (0.0007) -[2024-07-05 12:01:56,400][25826] Fps is (10 sec: 47514.3, 60 sec: 48196.3, 300 sec: 48318.9). Total num frames: 124076032. Throughput: 0: 12052.8. Samples: 6004592. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:01:56,401][25826] Avg episode reward: [(0, '48.897')] -[2024-07-05 12:01:56,753][47609] Updated weights for policy 0, policy_version 17591 (0.0008) -[2024-07-05 12:01:58,436][47609] Updated weights for policy 0, policy_version 17601 (0.0010) -[2024-07-05 12:02:00,144][47609] Updated weights for policy 0, policy_version 17611 (0.0008) -[2024-07-05 12:02:01,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48346.7). Total num frames: 124321792. Throughput: 0: 12054.1. Samples: 6076952. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:02:01,401][25826] Avg episode reward: [(0, '48.542')] -[2024-07-05 12:02:01,827][47609] Updated weights for policy 0, policy_version 17621 (0.0008) -[2024-07-05 12:02:03,518][47609] Updated weights for policy 0, policy_version 17631 (0.0008) -[2024-07-05 12:02:05,233][47609] Updated weights for policy 0, policy_version 17641 (0.0008) -[2024-07-05 12:02:06,400][25826] Fps is (10 sec: 49151.8, 60 sec: 48333.6, 300 sec: 48346.7). Total num frames: 124567552. Throughput: 0: 12054.1. Samples: 6113180. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:02:06,401][25826] Avg episode reward: [(0, '46.479')] -[2024-07-05 12:02:06,917][47609] Updated weights for policy 0, policy_version 17651 (0.0008) -[2024-07-05 12:02:08,624][47609] Updated weights for policy 0, policy_version 17661 (0.0007) -[2024-07-05 12:02:10,338][47609] Updated weights for policy 0, policy_version 17671 (0.0009) -[2024-07-05 12:02:11,400][25826] Fps is (10 sec: 48331.8, 60 sec: 48196.2, 300 sec: 48346.7). Total num frames: 124805120. Throughput: 0: 12050.3. Samples: 6185428. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:02:11,401][25826] Avg episode reward: [(0, '48.592')] -[2024-07-05 12:02:12,034][47609] Updated weights for policy 0, policy_version 17681 (0.0008) -[2024-07-05 12:02:13,741][47609] Updated weights for policy 0, policy_version 17691 (0.0007) -[2024-07-05 12:02:15,404][47609] Updated weights for policy 0, policy_version 17701 (0.0008) -[2024-07-05 12:02:16,400][25826] Fps is (10 sec: 47514.0, 60 sec: 48196.4, 300 sec: 48318.9). Total num frames: 125042688. Throughput: 0: 12057.1. Samples: 6258064. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:02:16,401][25826] Avg episode reward: [(0, '46.149')] -[2024-07-05 12:02:17,124][47609] Updated weights for policy 0, policy_version 17711 (0.0008) -[2024-07-05 12:02:18,829][47609] Updated weights for policy 0, policy_version 17721 (0.0008) -[2024-07-05 12:02:20,540][47609] Updated weights for policy 0, policy_version 17731 (0.0008) -[2024-07-05 12:02:21,400][25826] Fps is (10 sec: 48333.3, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 125288448. Throughput: 0: 12054.9. Samples: 6294368. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:02:21,401][25826] Avg episode reward: [(0, '46.596')] -[2024-07-05 12:02:22,185][47609] Updated weights for policy 0, policy_version 17741 (0.0008) -[2024-07-05 12:02:23,862][47609] Updated weights for policy 0, policy_version 17751 (0.0007) -[2024-07-05 12:02:25,572][47609] Updated weights for policy 0, policy_version 17761 (0.0008) -[2024-07-05 12:02:26,400][25826] Fps is (10 sec: 48332.5, 60 sec: 48196.3, 300 sec: 48319.0). Total num frames: 125526016. Throughput: 0: 12057.4. Samples: 6366740. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:02:26,401][25826] Avg episode reward: [(0, '43.975')] -[2024-07-05 12:02:27,257][47609] Updated weights for policy 0, policy_version 17771 (0.0007) -[2024-07-05 12:02:28,967][47609] Updated weights for policy 0, policy_version 17781 (0.0008) -[2024-07-05 12:02:30,684][47609] Updated weights for policy 0, policy_version 17791 (0.0008) -[2024-07-05 12:02:31,400][25826] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 125771776. Throughput: 0: 12060.7. Samples: 6439108. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:02:31,401][25826] Avg episode reward: [(0, '45.791')] -[2024-07-05 12:02:32,393][47609] Updated weights for policy 0, policy_version 17801 (0.0008) -[2024-07-05 12:02:34,095][47609] Updated weights for policy 0, policy_version 17811 (0.0007) -[2024-07-05 12:02:35,785][47609] Updated weights for policy 0, policy_version 17821 (0.0008) -[2024-07-05 12:02:36,401][25826] Fps is (10 sec: 48329.2, 60 sec: 48195.7, 300 sec: 48318.8). Total num frames: 126009344. Throughput: 0: 12065.7. Samples: 6475196. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:02:36,402][25826] Avg episode reward: [(0, '45.542')] -[2024-07-05 12:02:37,438][47609] Updated weights for policy 0, policy_version 17831 (0.0008) -[2024-07-05 12:02:39,124][47609] Updated weights for policy 0, policy_version 17841 (0.0007) -[2024-07-05 12:02:40,831][47609] Updated weights for policy 0, policy_version 17851 (0.0008) -[2024-07-05 12:02:41,400][25826] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 126255104. Throughput: 0: 12070.7. Samples: 6547772. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:02:41,401][25826] Avg episode reward: [(0, '49.884')] -[2024-07-05 12:02:42,543][47609] Updated weights for policy 0, policy_version 17861 (0.0008) -[2024-07-05 12:02:44,253][47609] Updated weights for policy 0, policy_version 17871 (0.0011) -[2024-07-05 12:02:45,962][47609] Updated weights for policy 0, policy_version 17881 (0.0008) -[2024-07-05 12:02:46,400][25826] Fps is (10 sec: 48336.0, 60 sec: 48196.3, 300 sec: 48318.9). Total num frames: 126492672. Throughput: 0: 12077.9. Samples: 6620460. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:02:46,402][25826] Avg episode reward: [(0, '46.734')] -[2024-07-05 12:02:47,604][47609] Updated weights for policy 0, policy_version 17891 (0.0008) -[2024-07-05 12:02:49,318][47609] Updated weights for policy 0, policy_version 17901 (0.0008) -[2024-07-05 12:02:51,000][47609] Updated weights for policy 0, policy_version 17911 (0.0010) -[2024-07-05 12:02:51,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 126738432. Throughput: 0: 12078.1. Samples: 6656692. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:02:51,401][25826] Avg episode reward: [(0, '47.085')] -[2024-07-05 12:02:52,738][47609] Updated weights for policy 0, policy_version 17921 (0.0007) -[2024-07-05 12:02:54,378][47609] Updated weights for policy 0, policy_version 17931 (0.0008) -[2024-07-05 12:02:56,074][47609] Updated weights for policy 0, policy_version 17941 (0.0008) -[2024-07-05 12:02:56,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48332.7, 300 sec: 48318.9). Total num frames: 126976000. Throughput: 0: 12090.9. Samples: 6729516. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:02:56,401][25826] Avg episode reward: [(0, '49.484')] -[2024-07-05 12:02:57,776][47609] Updated weights for policy 0, policy_version 17951 (0.0008) -[2024-07-05 12:02:59,476][47609] Updated weights for policy 0, policy_version 17961 (0.0008) -[2024-07-05 12:03:01,166][47609] Updated weights for policy 0, policy_version 17971 (0.0010) -[2024-07-05 12:03:01,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48319.0). Total num frames: 127221760. Throughput: 0: 12080.2. Samples: 6801672. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:03:01,401][25826] Avg episode reward: [(0, '46.205')] -[2024-07-05 12:03:01,415][47589] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000017972_127221760.pth... -[2024-07-05 12:03:01,493][47589] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000016557_115630080.pth -[2024-07-05 12:03:02,872][47609] Updated weights for policy 0, policy_version 17981 (0.0007) -[2024-07-05 12:03:04,594][47609] Updated weights for policy 0, policy_version 17991 (0.0008) -[2024-07-05 12:03:06,303][47609] Updated weights for policy 0, policy_version 18001 (0.0010) -[2024-07-05 12:03:06,400][25826] Fps is (10 sec: 48332.9, 60 sec: 48196.2, 300 sec: 48318.9). Total num frames: 127459328. Throughput: 0: 12077.3. Samples: 6837848. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:03:06,401][25826] Avg episode reward: [(0, '49.723')] -[2024-07-05 12:03:07,966][47609] Updated weights for policy 0, policy_version 18011 (0.0008) -[2024-07-05 12:03:09,693][47609] Updated weights for policy 0, policy_version 18021 (0.0008) -[2024-07-05 12:03:11,371][47609] Updated weights for policy 0, policy_version 18031 (0.0008) -[2024-07-05 12:03:11,400][25826] Fps is (10 sec: 48332.5, 60 sec: 48332.9, 300 sec: 48318.9). Total num frames: 127705088. Throughput: 0: 12077.9. Samples: 6910248. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:03:11,401][25826] Avg episode reward: [(0, '48.051')] -[2024-07-05 12:03:13,021][47609] Updated weights for policy 0, policy_version 18041 (0.0008) -[2024-07-05 12:03:14,715][47609] Updated weights for policy 0, policy_version 18051 (0.0008) -[2024-07-05 12:03:16,400][25826] Fps is (10 sec: 48333.2, 60 sec: 48332.8, 300 sec: 48319.0). Total num frames: 127942656. Throughput: 0: 12077.3. Samples: 6982584. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:03:16,401][25826] Avg episode reward: [(0, '49.868')] -[2024-07-05 12:03:16,449][47609] Updated weights for policy 0, policy_version 18061 (0.0008) -[2024-07-05 12:03:18,111][47609] Updated weights for policy 0, policy_version 18071 (0.0008) -[2024-07-05 12:03:19,809][47609] Updated weights for policy 0, policy_version 18081 (0.0008) -[2024-07-05 12:03:21,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 128188416. Throughput: 0: 12081.7. Samples: 7018864. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:03:21,401][25826] Avg episode reward: [(0, '51.449')] -[2024-07-05 12:03:21,537][47609] Updated weights for policy 0, policy_version 18091 (0.0008) -[2024-07-05 12:03:23,214][47609] Updated weights for policy 0, policy_version 18101 (0.0008) -[2024-07-05 12:03:24,937][47609] Updated weights for policy 0, policy_version 18111 (0.0009) -[2024-07-05 12:03:26,400][25826] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 128425984. Throughput: 0: 12076.8. Samples: 7091228. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:03:26,401][25826] Avg episode reward: [(0, '47.447')] -[2024-07-05 12:03:26,681][47609] Updated weights for policy 0, policy_version 18121 (0.0008) -[2024-07-05 12:03:28,338][47609] Updated weights for policy 0, policy_version 18131 (0.0010) -[2024-07-05 12:03:30,033][47609] Updated weights for policy 0, policy_version 18141 (0.0010) -[2024-07-05 12:03:31,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 128671744. Throughput: 0: 12068.4. Samples: 7163536. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:03:31,401][25826] Avg episode reward: [(0, '49.436')] -[2024-07-05 12:03:31,737][47609] Updated weights for policy 0, policy_version 18151 (0.0008) -[2024-07-05 12:03:33,469][47609] Updated weights for policy 0, policy_version 18161 (0.0010) -[2024-07-05 12:03:35,175][47609] Updated weights for policy 0, policy_version 18171 (0.0017) -[2024-07-05 12:03:36,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48333.4, 300 sec: 48291.2). Total num frames: 128909312. Throughput: 0: 12060.1. Samples: 7199396. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 12:03:36,401][25826] Avg episode reward: [(0, '47.208')] -[2024-07-05 12:03:36,874][47609] Updated weights for policy 0, policy_version 18181 (0.0007) -[2024-07-05 12:03:38,554][47609] Updated weights for policy 0, policy_version 18191 (0.0008) -[2024-07-05 12:03:40,271][47609] Updated weights for policy 0, policy_version 18201 (0.0011) -[2024-07-05 12:03:41,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 129155072. Throughput: 0: 12056.0. Samples: 7272036. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 12:03:41,401][25826] Avg episode reward: [(0, '49.773')] -[2024-07-05 12:03:41,971][47609] Updated weights for policy 0, policy_version 18211 (0.0008) -[2024-07-05 12:03:43,646][47609] Updated weights for policy 0, policy_version 18221 (0.0012) -[2024-07-05 12:03:45,356][47609] Updated weights for policy 0, policy_version 18231 (0.0008) -[2024-07-05 12:03:46,400][25826] Fps is (10 sec: 48331.1, 60 sec: 48332.6, 300 sec: 48291.1). Total num frames: 129392640. Throughput: 0: 12060.5. Samples: 7344400. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:03:46,401][25826] Avg episode reward: [(0, '50.050')] -[2024-07-05 12:03:47,046][47609] Updated weights for policy 0, policy_version 18241 (0.0009) -[2024-07-05 12:03:48,701][47609] Updated weights for policy 0, policy_version 18251 (0.0008) -[2024-07-05 12:03:50,372][47609] Updated weights for policy 0, policy_version 18261 (0.0007) -[2024-07-05 12:03:51,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 129638400. Throughput: 0: 12077.1. Samples: 7381316. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:03:51,401][25826] Avg episode reward: [(0, '48.117')] -[2024-07-05 12:03:52,067][47609] Updated weights for policy 0, policy_version 18271 (0.0007) -[2024-07-05 12:03:53,800][47609] Updated weights for policy 0, policy_version 18281 (0.0008) -[2024-07-05 12:03:55,498][47609] Updated weights for policy 0, policy_version 18291 (0.0008) -[2024-07-05 12:03:56,400][25826] Fps is (10 sec: 48334.3, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 129875968. Throughput: 0: 12074.1. Samples: 7453580. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:03:56,401][25826] Avg episode reward: [(0, '46.252')] -[2024-07-05 12:03:57,167][47609] Updated weights for policy 0, policy_version 18301 (0.0008) -[2024-07-05 12:03:58,833][47609] Updated weights for policy 0, policy_version 18311 (0.0010) -[2024-07-05 12:04:00,553][47609] Updated weights for policy 0, policy_version 18321 (0.0009) -[2024-07-05 12:04:01,400][25826] Fps is (10 sec: 47513.3, 60 sec: 48196.2, 300 sec: 48291.3). Total num frames: 130113536. Throughput: 0: 12085.1. Samples: 7526416. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:04:01,401][25826] Avg episode reward: [(0, '48.242')] -[2024-07-05 12:04:02,220][47609] Updated weights for policy 0, policy_version 18331 (0.0010) -[2024-07-05 12:04:03,919][47609] Updated weights for policy 0, policy_version 18341 (0.0008) -[2024-07-05 12:04:05,645][47609] Updated weights for policy 0, policy_version 18351 (0.0009) -[2024-07-05 12:04:06,400][25826] Fps is (10 sec: 48332.4, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 130359296. Throughput: 0: 12079.8. Samples: 7562456. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:04:06,401][25826] Avg episode reward: [(0, '48.800')] -[2024-07-05 12:04:07,328][47609] Updated weights for policy 0, policy_version 18361 (0.0008) -[2024-07-05 12:04:09,015][47609] Updated weights for policy 0, policy_version 18371 (0.0008) -[2024-07-05 12:04:10,719][47609] Updated weights for policy 0, policy_version 18381 (0.0008) -[2024-07-05 12:04:11,400][25826] Fps is (10 sec: 49152.7, 60 sec: 48332.9, 300 sec: 48318.9). Total num frames: 130605056. Throughput: 0: 12085.3. Samples: 7635068. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:04:11,401][25826] Avg episode reward: [(0, '49.390')] -[2024-07-05 12:04:12,420][47609] Updated weights for policy 0, policy_version 18391 (0.0007) -[2024-07-05 12:04:14,110][47609] Updated weights for policy 0, policy_version 18401 (0.0008) -[2024-07-05 12:04:15,794][47609] Updated weights for policy 0, policy_version 18411 (0.0007) -[2024-07-05 12:04:16,400][25826] Fps is (10 sec: 48333.3, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 130842624. Throughput: 0: 12076.8. Samples: 7706992. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:04:16,401][25826] Avg episode reward: [(0, '47.851')] -[2024-07-05 12:04:17,515][47609] Updated weights for policy 0, policy_version 18421 (0.0007) -[2024-07-05 12:04:19,201][47609] Updated weights for policy 0, policy_version 18431 (0.0007) -[2024-07-05 12:04:20,947][47609] Updated weights for policy 0, policy_version 18441 (0.0008) -[2024-07-05 12:04:21,400][25826] Fps is (10 sec: 47513.0, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 131080192. Throughput: 0: 12088.0. Samples: 7743356. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:04:21,401][25826] Avg episode reward: [(0, '51.052')] -[2024-07-05 12:04:22,647][47609] Updated weights for policy 0, policy_version 18451 (0.0008) -[2024-07-05 12:04:24,374][47609] Updated weights for policy 0, policy_version 18461 (0.0008) -[2024-07-05 12:04:26,047][47609] Updated weights for policy 0, policy_version 18471 (0.0008) -[2024-07-05 12:04:26,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 131325952. Throughput: 0: 12076.6. Samples: 7815484. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:04:26,401][25826] Avg episode reward: [(0, '48.689')] -[2024-07-05 12:04:27,724][47609] Updated weights for policy 0, policy_version 18481 (0.0008) -[2024-07-05 12:04:29,440][47609] Updated weights for policy 0, policy_version 18491 (0.0008) -[2024-07-05 12:04:31,122][47609] Updated weights for policy 0, policy_version 18501 (0.0008) -[2024-07-05 12:04:31,400][25826] Fps is (10 sec: 48332.3, 60 sec: 48196.1, 300 sec: 48263.4). Total num frames: 131563520. Throughput: 0: 12080.8. Samples: 7888036. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:04:31,402][25826] Avg episode reward: [(0, '46.799')] -[2024-07-05 12:04:32,811][47609] Updated weights for policy 0, policy_version 18511 (0.0008) -[2024-07-05 12:04:34,513][47609] Updated weights for policy 0, policy_version 18521 (0.0010) -[2024-07-05 12:04:36,190][47609] Updated weights for policy 0, policy_version 18531 (0.0008) -[2024-07-05 12:04:36,400][25826] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 131809280. Throughput: 0: 12071.0. Samples: 7924512. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:04:36,401][25826] Avg episode reward: [(0, '47.501')] -[2024-07-05 12:04:37,911][47609] Updated weights for policy 0, policy_version 18541 (0.0008) -[2024-07-05 12:04:39,587][47609] Updated weights for policy 0, policy_version 18551 (0.0008) -[2024-07-05 12:04:41,237][47609] Updated weights for policy 0, policy_version 18561 (0.0008) -[2024-07-05 12:04:41,400][25826] Fps is (10 sec: 48333.2, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 132046848. Throughput: 0: 12081.7. Samples: 7997256. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:04:41,401][25826] Avg episode reward: [(0, '47.965')] -[2024-07-05 12:04:42,950][47609] Updated weights for policy 0, policy_version 18571 (0.0009) -[2024-07-05 12:04:44,630][47609] Updated weights for policy 0, policy_version 18581 (0.0008) -[2024-07-05 12:04:46,322][47609] Updated weights for policy 0, policy_version 18591 (0.0008) -[2024-07-05 12:04:46,400][25826] Fps is (10 sec: 48331.9, 60 sec: 48332.9, 300 sec: 48291.1). Total num frames: 132292608. Throughput: 0: 12072.3. Samples: 8069672. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:04:46,401][25826] Avg episode reward: [(0, '44.139')] -[2024-07-05 12:04:48,029][47609] Updated weights for policy 0, policy_version 18601 (0.0007) -[2024-07-05 12:04:49,724][47609] Updated weights for policy 0, policy_version 18611 (0.0008) -[2024-07-05 12:04:51,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 132530176. Throughput: 0: 12079.4. Samples: 8106028. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:04:51,401][25826] Avg episode reward: [(0, '46.738')] -[2024-07-05 12:04:51,422][47609] Updated weights for policy 0, policy_version 18621 (0.0010) -[2024-07-05 12:04:53,104][47609] Updated weights for policy 0, policy_version 18631 (0.0008) -[2024-07-05 12:04:54,798][47609] Updated weights for policy 0, policy_version 18641 (0.0008) -[2024-07-05 12:04:56,400][25826] Fps is (10 sec: 48333.2, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 132775936. Throughput: 0: 12068.4. Samples: 8178148. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:04:56,401][25826] Avg episode reward: [(0, '44.096')] -[2024-07-05 12:04:56,542][47609] Updated weights for policy 0, policy_version 18651 (0.0011) -[2024-07-05 12:04:58,230][47609] Updated weights for policy 0, policy_version 18661 (0.0009) -[2024-07-05 12:04:59,907][47609] Updated weights for policy 0, policy_version 18671 (0.0008) -[2024-07-05 12:05:01,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48332.9, 300 sec: 48263.4). Total num frames: 133013504. Throughput: 0: 12076.5. Samples: 8250432. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:05:01,401][25826] Avg episode reward: [(0, '49.430')] -[2024-07-05 12:05:01,414][47589] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000018679_133013504.pth... -[2024-07-05 12:05:01,489][47589] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000017265_121430016.pth -[2024-07-05 12:05:01,626][47609] Updated weights for policy 0, policy_version 18681 (0.0007) -[2024-07-05 12:05:03,338][47609] Updated weights for policy 0, policy_version 18691 (0.0008) -[2024-07-05 12:05:05,039][47609] Updated weights for policy 0, policy_version 18701 (0.0007) -[2024-07-05 12:05:06,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 133259264. Throughput: 0: 12066.8. Samples: 8286364. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:05:06,401][25826] Avg episode reward: [(0, '49.726')] -[2024-07-05 12:05:06,695][47609] Updated weights for policy 0, policy_version 18711 (0.0009) -[2024-07-05 12:05:08,396][47609] Updated weights for policy 0, policy_version 18721 (0.0009) -[2024-07-05 12:05:10,109][47609] Updated weights for policy 0, policy_version 18731 (0.0008) -[2024-07-05 12:05:11,400][25826] Fps is (10 sec: 48332.5, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 133496832. Throughput: 0: 12076.4. Samples: 8358920. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:05:11,401][25826] Avg episode reward: [(0, '46.797')] -[2024-07-05 12:05:11,856][47609] Updated weights for policy 0, policy_version 18741 (0.0007) -[2024-07-05 12:05:13,582][47609] Updated weights for policy 0, policy_version 18751 (0.0007) -[2024-07-05 12:05:15,274][47609] Updated weights for policy 0, policy_version 18761 (0.0008) -[2024-07-05 12:05:16,400][25826] Fps is (10 sec: 47514.0, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 133734400. Throughput: 0: 12059.8. Samples: 8430724. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:05:16,401][25826] Avg episode reward: [(0, '48.856')] -[2024-07-05 12:05:16,939][47609] Updated weights for policy 0, policy_version 18771 (0.0008) -[2024-07-05 12:05:18,628][47609] Updated weights for policy 0, policy_version 18781 (0.0007) -[2024-07-05 12:05:20,349][47609] Updated weights for policy 0, policy_version 18791 (0.0010) -[2024-07-05 12:05:21,401][25826] Fps is (10 sec: 48328.4, 60 sec: 48332.1, 300 sec: 48263.2). Total num frames: 133980160. Throughput: 0: 12058.2. Samples: 8467144. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:05:21,402][25826] Avg episode reward: [(0, '49.400')] -[2024-07-05 12:05:22,074][47609] Updated weights for policy 0, policy_version 18801 (0.0008) -[2024-07-05 12:05:23,770][47609] Updated weights for policy 0, policy_version 18811 (0.0008) -[2024-07-05 12:05:25,459][47609] Updated weights for policy 0, policy_version 18821 (0.0008) -[2024-07-05 12:05:26,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 134217728. Throughput: 0: 12050.1. Samples: 8539508. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:05:26,401][25826] Avg episode reward: [(0, '49.014')] -[2024-07-05 12:05:27,121][47609] Updated weights for policy 0, policy_version 18831 (0.0008) -[2024-07-05 12:05:28,801][47609] Updated weights for policy 0, policy_version 18841 (0.0012) -[2024-07-05 12:05:30,498][47609] Updated weights for policy 0, policy_version 18851 (0.0008) -[2024-07-05 12:05:31,400][25826] Fps is (10 sec: 48337.3, 60 sec: 48332.9, 300 sec: 48263.4). Total num frames: 134463488. Throughput: 0: 12048.3. Samples: 8611844. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:05:31,401][25826] Avg episode reward: [(0, '50.175')] -[2024-07-05 12:05:32,216][47609] Updated weights for policy 0, policy_version 18861 (0.0008) -[2024-07-05 12:05:33,916][47609] Updated weights for policy 0, policy_version 18871 (0.0007) -[2024-07-05 12:05:35,664][47609] Updated weights for policy 0, policy_version 18881 (0.0008) -[2024-07-05 12:05:36,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 134701056. Throughput: 0: 12046.4. Samples: 8648116. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:05:36,401][25826] Avg episode reward: [(0, '51.615')] -[2024-07-05 12:05:37,335][47609] Updated weights for policy 0, policy_version 18891 (0.0008) -[2024-07-05 12:05:39,033][47609] Updated weights for policy 0, policy_version 18901 (0.0008) -[2024-07-05 12:05:40,733][47609] Updated weights for policy 0, policy_version 18911 (0.0007) -[2024-07-05 12:05:41,400][25826] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 134946816. Throughput: 0: 12056.6. Samples: 8720696. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:05:41,401][25826] Avg episode reward: [(0, '49.036')] -[2024-07-05 12:05:42,365][47609] Updated weights for policy 0, policy_version 18921 (0.0008) -[2024-07-05 12:05:44,110][47609] Updated weights for policy 0, policy_version 18931 (0.0008) -[2024-07-05 12:05:45,791][47609] Updated weights for policy 0, policy_version 18941 (0.0009) -[2024-07-05 12:05:46,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48196.4, 300 sec: 48263.4). Total num frames: 135184384. Throughput: 0: 12054.2. Samples: 8792872. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:05:46,401][25826] Avg episode reward: [(0, '50.551')] -[2024-07-05 12:05:47,475][47609] Updated weights for policy 0, policy_version 18951 (0.0010) -[2024-07-05 12:05:49,159][47609] Updated weights for policy 0, policy_version 18961 (0.0012) -[2024-07-05 12:05:50,879][47609] Updated weights for policy 0, policy_version 18971 (0.0009) -[2024-07-05 12:05:51,400][25826] Fps is (10 sec: 48333.2, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 135430144. Throughput: 0: 12060.6. Samples: 8829092. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:05:51,401][25826] Avg episode reward: [(0, '49.085')] -[2024-07-05 12:05:52,557][47609] Updated weights for policy 0, policy_version 18981 (0.0008) -[2024-07-05 12:05:54,258][47609] Updated weights for policy 0, policy_version 18991 (0.0008) -[2024-07-05 12:05:55,981][47609] Updated weights for policy 0, policy_version 19001 (0.0008) -[2024-07-05 12:05:56,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 135667712. Throughput: 0: 12057.4. Samples: 8901504. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:05:56,401][25826] Avg episode reward: [(0, '49.810')] -[2024-07-05 12:05:57,706][47609] Updated weights for policy 0, policy_version 19011 (0.0009) -[2024-07-05 12:05:59,435][47609] Updated weights for policy 0, policy_version 19021 (0.0008) -[2024-07-05 12:06:01,118][47609] Updated weights for policy 0, policy_version 19031 (0.0008) -[2024-07-05 12:06:01,400][25826] Fps is (10 sec: 47513.4, 60 sec: 48196.2, 300 sec: 48263.5). Total num frames: 135905280. Throughput: 0: 12063.1. Samples: 8973564. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:06:01,401][25826] Avg episode reward: [(0, '47.679')] -[2024-07-05 12:06:02,810][47609] Updated weights for policy 0, policy_version 19041 (0.0008) -[2024-07-05 12:06:04,544][47609] Updated weights for policy 0, policy_version 19051 (0.0008) -[2024-07-05 12:06:06,213][47609] Updated weights for policy 0, policy_version 19061 (0.0009) -[2024-07-05 12:06:06,400][25826] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 136151040. Throughput: 0: 12053.9. Samples: 9009560. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:06:06,401][25826] Avg episode reward: [(0, '49.476')] -[2024-07-05 12:06:07,931][47609] Updated weights for policy 0, policy_version 19071 (0.0013) -[2024-07-05 12:06:09,633][47609] Updated weights for policy 0, policy_version 19081 (0.0008) -[2024-07-05 12:06:11,330][47609] Updated weights for policy 0, policy_version 19091 (0.0009) -[2024-07-05 12:06:11,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 136388608. Throughput: 0: 12043.8. Samples: 9081480. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:06:11,401][25826] Avg episode reward: [(0, '49.336')] -[2024-07-05 12:06:13,008][47609] Updated weights for policy 0, policy_version 19101 (0.0007) -[2024-07-05 12:06:14,703][47609] Updated weights for policy 0, policy_version 19111 (0.0010) -[2024-07-05 12:06:16,392][47609] Updated weights for policy 0, policy_version 19121 (0.0008) -[2024-07-05 12:06:16,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 136634368. Throughput: 0: 12049.7. Samples: 9154080. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:06:16,401][25826] Avg episode reward: [(0, '48.225')] -[2024-07-05 12:06:18,120][47609] Updated weights for policy 0, policy_version 19131 (0.0007) -[2024-07-05 12:06:19,844][47609] Updated weights for policy 0, policy_version 19141 (0.0008) -[2024-07-05 12:06:21,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48197.0, 300 sec: 48263.4). Total num frames: 136871936. Throughput: 0: 12045.9. Samples: 9190184. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:06:21,402][25826] Avg episode reward: [(0, '49.206')] -[2024-07-05 12:06:21,551][47609] Updated weights for policy 0, policy_version 19151 (0.0008) -[2024-07-05 12:06:23,236][47609] Updated weights for policy 0, policy_version 19161 (0.0007) -[2024-07-05 12:06:24,927][47609] Updated weights for policy 0, policy_version 19171 (0.0007) -[2024-07-05 12:06:26,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 137117696. Throughput: 0: 12050.1. Samples: 9262952. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:06:26,401][25826] Avg episode reward: [(0, '47.660')] -[2024-07-05 12:06:26,615][47609] Updated weights for policy 0, policy_version 19181 (0.0008) -[2024-07-05 12:06:28,283][47609] Updated weights for policy 0, policy_version 19191 (0.0007) -[2024-07-05 12:06:29,971][47609] Updated weights for policy 0, policy_version 19201 (0.0010) -[2024-07-05 12:06:31,400][25826] Fps is (10 sec: 48333.2, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 137355264. Throughput: 0: 12057.5. Samples: 9335460. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:06:31,401][25826] Avg episode reward: [(0, '48.338')] -[2024-07-05 12:06:31,686][47609] Updated weights for policy 0, policy_version 19211 (0.0008) -[2024-07-05 12:06:33,371][47609] Updated weights for policy 0, policy_version 19221 (0.0007) -[2024-07-05 12:06:35,058][47609] Updated weights for policy 0, policy_version 19231 (0.0008) -[2024-07-05 12:06:36,400][25826] Fps is (10 sec: 47513.7, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 137592832. Throughput: 0: 12056.9. Samples: 9371652. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:06:36,401][25826] Avg episode reward: [(0, '47.266')] -[2024-07-05 12:06:36,762][47609] Updated weights for policy 0, policy_version 19241 (0.0007) -[2024-07-05 12:06:38,456][47609] Updated weights for policy 0, policy_version 19251 (0.0008) -[2024-07-05 12:06:40,175][47609] Updated weights for policy 0, policy_version 19261 (0.0007) -[2024-07-05 12:06:41,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 137838592. Throughput: 0: 12045.4. Samples: 9443548. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:06:41,401][25826] Avg episode reward: [(0, '46.670')] -[2024-07-05 12:06:41,898][47609] Updated weights for policy 0, policy_version 19271 (0.0010) -[2024-07-05 12:06:43,638][47609] Updated weights for policy 0, policy_version 19281 (0.0008) -[2024-07-05 12:06:45,316][47609] Updated weights for policy 0, policy_version 19291 (0.0009) -[2024-07-05 12:06:46,400][25826] Fps is (10 sec: 48331.2, 60 sec: 48196.0, 300 sec: 48263.3). Total num frames: 138076160. Throughput: 0: 12050.1. Samples: 9515824. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:06:46,402][25826] Avg episode reward: [(0, '47.793')] -[2024-07-05 12:06:47,011][47609] Updated weights for policy 0, policy_version 19301 (0.0008) -[2024-07-05 12:06:48,695][47609] Updated weights for policy 0, policy_version 19311 (0.0008) -[2024-07-05 12:06:50,386][47609] Updated weights for policy 0, policy_version 19321 (0.0008) -[2024-07-05 12:06:51,400][25826] Fps is (10 sec: 48332.4, 60 sec: 48196.2, 300 sec: 48291.1). Total num frames: 138321920. Throughput: 0: 12054.4. Samples: 9552008. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:06:51,401][25826] Avg episode reward: [(0, '48.363')] -[2024-07-05 12:06:52,095][47609] Updated weights for policy 0, policy_version 19331 (0.0009) -[2024-07-05 12:06:53,777][47609] Updated weights for policy 0, policy_version 19341 (0.0008) -[2024-07-05 12:06:55,452][47609] Updated weights for policy 0, policy_version 19351 (0.0009) -[2024-07-05 12:06:56,400][25826] Fps is (10 sec: 48334.4, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 138559488. Throughput: 0: 12060.6. Samples: 9624208. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:06:56,401][25826] Avg episode reward: [(0, '49.628')] -[2024-07-05 12:06:57,152][47609] Updated weights for policy 0, policy_version 19361 (0.0008) -[2024-07-05 12:06:58,833][47609] Updated weights for policy 0, policy_version 19371 (0.0007) -[2024-07-05 12:07:00,558][47609] Updated weights for policy 0, policy_version 19381 (0.0013) -[2024-07-05 12:07:01,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 138805248. Throughput: 0: 12063.4. Samples: 9696932. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:07:01,401][25826] Avg episode reward: [(0, '48.999')] -[2024-07-05 12:07:01,416][47589] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000019386_138805248.pth... -[2024-07-05 12:07:01,502][47589] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000017972_127221760.pth -[2024-07-05 12:07:02,298][47609] Updated weights for policy 0, policy_version 19391 (0.0009) -[2024-07-05 12:07:03,996][47609] Updated weights for policy 0, policy_version 19401 (0.0009) -[2024-07-05 12:07:05,652][47609] Updated weights for policy 0, policy_version 19411 (0.0008) -[2024-07-05 12:07:06,400][25826] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 139042816. Throughput: 0: 12064.4. Samples: 9733080. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:07:06,401][25826] Avg episode reward: [(0, '48.493')] -[2024-07-05 12:07:07,375][47609] Updated weights for policy 0, policy_version 19421 (0.0008) -[2024-07-05 12:07:09,049][47609] Updated weights for policy 0, policy_version 19431 (0.0008) -[2024-07-05 12:07:10,752][47609] Updated weights for policy 0, policy_version 19441 (0.0008) -[2024-07-05 12:07:11,400][25826] Fps is (10 sec: 48333.2, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 139288576. Throughput: 0: 12050.2. Samples: 9805212. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:07:11,401][25826] Avg episode reward: [(0, '47.229')] -[2024-07-05 12:07:12,401][47609] Updated weights for policy 0, policy_version 19451 (0.0007) -[2024-07-05 12:07:14,098][47609] Updated weights for policy 0, policy_version 19461 (0.0008) -[2024-07-05 12:07:15,849][47609] Updated weights for policy 0, policy_version 19471 (0.0008) -[2024-07-05 12:07:16,400][25826] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 139526144. Throughput: 0: 12057.8. Samples: 9878060. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:07:16,401][25826] Avg episode reward: [(0, '45.477')] -[2024-07-05 12:07:17,570][47609] Updated weights for policy 0, policy_version 19481 (0.0008) -[2024-07-05 12:07:19,283][47609] Updated weights for policy 0, policy_version 19491 (0.0012) -[2024-07-05 12:07:20,952][47609] Updated weights for policy 0, policy_version 19501 (0.0008) -[2024-07-05 12:07:21,400][25826] Fps is (10 sec: 47513.0, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 139763712. Throughput: 0: 12050.4. Samples: 9913920. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:07:21,401][25826] Avg episode reward: [(0, '48.031')] -[2024-07-05 12:07:22,611][47609] Updated weights for policy 0, policy_version 19511 (0.0010) -[2024-07-05 12:07:24,331][47609] Updated weights for policy 0, policy_version 19521 (0.0008) -[2024-07-05 12:07:26,038][47609] Updated weights for policy 0, policy_version 19531 (0.0008) -[2024-07-05 12:07:26,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 140009472. Throughput: 0: 12065.3. Samples: 9986488. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:07:26,401][25826] Avg episode reward: [(0, '49.614')] -[2024-07-05 12:07:27,702][47609] Updated weights for policy 0, policy_version 19541 (0.0007) -[2024-07-05 12:07:29,448][47609] Updated weights for policy 0, policy_version 19551 (0.0011) -[2024-07-05 12:07:31,150][47609] Updated weights for policy 0, policy_version 19561 (0.0008) -[2024-07-05 12:07:31,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48196.2, 300 sec: 48263.5). Total num frames: 140247040. Throughput: 0: 12062.6. Samples: 10058636. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:07:31,401][25826] Avg episode reward: [(0, '48.964')] -[2024-07-05 12:07:32,834][47609] Updated weights for policy 0, policy_version 19571 (0.0007) -[2024-07-05 12:07:34,548][47609] Updated weights for policy 0, policy_version 19581 (0.0010) -[2024-07-05 12:07:36,217][47609] Updated weights for policy 0, policy_version 19591 (0.0008) -[2024-07-05 12:07:36,400][25826] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 140492800. Throughput: 0: 12066.9. Samples: 10095020. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:07:36,401][25826] Avg episode reward: [(0, '48.986')] -[2024-07-05 12:07:37,920][47609] Updated weights for policy 0, policy_version 19601 (0.0008) -[2024-07-05 12:07:39,596][47609] Updated weights for policy 0, policy_version 19611 (0.0009) -[2024-07-05 12:07:41,285][47609] Updated weights for policy 0, policy_version 19621 (0.0008) -[2024-07-05 12:07:41,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 140730368. Throughput: 0: 12064.2. Samples: 10167096. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:07:41,401][25826] Avg episode reward: [(0, '48.406')] -[2024-07-05 12:07:43,000][47609] Updated weights for policy 0, policy_version 19631 (0.0008) -[2024-07-05 12:07:44,687][47609] Updated weights for policy 0, policy_version 19641 (0.0007) -[2024-07-05 12:07:46,400][25826] Fps is (10 sec: 47513.4, 60 sec: 48196.5, 300 sec: 48235.6). Total num frames: 140967936. Throughput: 0: 12059.0. Samples: 10239588. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:07:46,401][25826] Avg episode reward: [(0, '47.606')] -[2024-07-05 12:07:46,428][47609] Updated weights for policy 0, policy_version 19651 (0.0008) -[2024-07-05 12:07:48,112][47609] Updated weights for policy 0, policy_version 19661 (0.0008) -[2024-07-05 12:07:49,854][47609] Updated weights for policy 0, policy_version 19671 (0.0008) -[2024-07-05 12:07:51,400][25826] Fps is (10 sec: 48332.1, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 141213696. Throughput: 0: 12061.7. Samples: 10275856. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:07:51,401][25826] Avg episode reward: [(0, '43.469')] -[2024-07-05 12:07:51,494][47609] Updated weights for policy 0, policy_version 19681 (0.0008) -[2024-07-05 12:07:53,203][47609] Updated weights for policy 0, policy_version 19691 (0.0007) -[2024-07-05 12:07:54,912][47609] Updated weights for policy 0, policy_version 19701 (0.0008) -[2024-07-05 12:07:56,400][25826] Fps is (10 sec: 48333.3, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 141451264. Throughput: 0: 12068.2. Samples: 10348280. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:07:56,401][25826] Avg episode reward: [(0, '46.453')] -[2024-07-05 12:07:56,611][47609] Updated weights for policy 0, policy_version 19711 (0.0008) -[2024-07-05 12:07:58,297][47609] Updated weights for policy 0, policy_version 19721 (0.0007) -[2024-07-05 12:08:00,001][47609] Updated weights for policy 0, policy_version 19731 (0.0011) -[2024-07-05 12:08:01,400][25826] Fps is (10 sec: 48333.3, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 141697024. Throughput: 0: 12049.2. Samples: 10420276. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:08:01,401][25826] Avg episode reward: [(0, '49.499')] -[2024-07-05 12:08:01,694][47609] Updated weights for policy 0, policy_version 19741 (0.0007) -[2024-07-05 12:08:03,416][47609] Updated weights for policy 0, policy_version 19751 (0.0008) -[2024-07-05 12:08:05,133][47609] Updated weights for policy 0, policy_version 19761 (0.0008) -[2024-07-05 12:08:06,400][25826] Fps is (10 sec: 48332.4, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 141934592. Throughput: 0: 12047.7. Samples: 10456064. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:08:06,401][25826] Avg episode reward: [(0, '47.397')] -[2024-07-05 12:08:06,838][47609] Updated weights for policy 0, policy_version 19771 (0.0007) -[2024-07-05 12:08:08,513][47609] Updated weights for policy 0, policy_version 19781 (0.0007) -[2024-07-05 12:08:10,217][47609] Updated weights for policy 0, policy_version 19791 (0.0008) -[2024-07-05 12:08:11,400][25826] Fps is (10 sec: 47513.8, 60 sec: 48059.7, 300 sec: 48235.6). Total num frames: 142172160. Throughput: 0: 12044.9. Samples: 10528508. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:08:11,401][25826] Avg episode reward: [(0, '50.106')] -[2024-07-05 12:08:11,953][47609] Updated weights for policy 0, policy_version 19801 (0.0008) -[2024-07-05 12:08:13,645][47609] Updated weights for policy 0, policy_version 19811 (0.0008) -[2024-07-05 12:08:15,358][47609] Updated weights for policy 0, policy_version 19821 (0.0007) -[2024-07-05 12:08:16,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 142417920. Throughput: 0: 12046.0. Samples: 10600704. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:08:16,401][25826] Avg episode reward: [(0, '49.834')] -[2024-07-05 12:08:17,079][47609] Updated weights for policy 0, policy_version 19831 (0.0008) -[2024-07-05 12:08:18,764][47609] Updated weights for policy 0, policy_version 19841 (0.0009) -[2024-07-05 12:08:20,493][47609] Updated weights for policy 0, policy_version 19851 (0.0008) -[2024-07-05 12:08:21,400][25826] Fps is (10 sec: 48332.3, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 142655488. Throughput: 0: 12032.0. Samples: 10636460. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:08:21,401][25826] Avg episode reward: [(0, '50.258')] -[2024-07-05 12:08:22,162][47609] Updated weights for policy 0, policy_version 19861 (0.0008) -[2024-07-05 12:08:23,799][47609] Updated weights for policy 0, policy_version 19871 (0.0008) -[2024-07-05 12:08:25,504][47609] Updated weights for policy 0, policy_version 19881 (0.0010) -[2024-07-05 12:08:26,400][25826] Fps is (10 sec: 48332.6, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 142901248. Throughput: 0: 12038.2. Samples: 10708816. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:08:26,401][25826] Avg episode reward: [(0, '49.282')] -[2024-07-05 12:08:27,222][47609] Updated weights for policy 0, policy_version 19891 (0.0008) -[2024-07-05 12:08:28,918][47609] Updated weights for policy 0, policy_version 19901 (0.0008) -[2024-07-05 12:08:30,659][47609] Updated weights for policy 0, policy_version 19911 (0.0007) -[2024-07-05 12:08:31,400][25826] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 143138816. Throughput: 0: 12036.8. Samples: 10781244. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:08:31,401][25826] Avg episode reward: [(0, '49.703')] -[2024-07-05 12:08:32,347][47609] Updated weights for policy 0, policy_version 19921 (0.0008) -[2024-07-05 12:08:34,017][47609] Updated weights for policy 0, policy_version 19931 (0.0009) -[2024-07-05 12:08:35,705][47609] Updated weights for policy 0, policy_version 19941 (0.0010) -[2024-07-05 12:08:36,400][25826] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 143384576. Throughput: 0: 12042.6. Samples: 10817772. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:08:36,401][25826] Avg episode reward: [(0, '50.324')] -[2024-07-05 12:08:37,418][47609] Updated weights for policy 0, policy_version 19951 (0.0008) -[2024-07-05 12:08:39,167][47609] Updated weights for policy 0, policy_version 19961 (0.0008) -[2024-07-05 12:08:40,845][47609] Updated weights for policy 0, policy_version 19971 (0.0009) -[2024-07-05 12:08:41,400][25826] Fps is (10 sec: 48333.3, 60 sec: 48196.3, 300 sec: 48235.7). Total num frames: 143622144. Throughput: 0: 12034.0. Samples: 10889808. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:08:41,401][25826] Avg episode reward: [(0, '47.387')] -[2024-07-05 12:08:42,547][47609] Updated weights for policy 0, policy_version 19981 (0.0009) -[2024-07-05 12:08:44,242][47609] Updated weights for policy 0, policy_version 19991 (0.0010) -[2024-07-05 12:08:45,939][47609] Updated weights for policy 0, policy_version 20001 (0.0007) -[2024-07-05 12:08:46,400][25826] Fps is (10 sec: 47512.8, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 143859712. Throughput: 0: 12037.9. Samples: 10961984. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:08:46,401][25826] Avg episode reward: [(0, '52.062')] -[2024-07-05 12:08:46,403][47589] Saving new best policy, reward=52.062! -[2024-07-05 12:08:47,616][47609] Updated weights for policy 0, policy_version 20011 (0.0008) -[2024-07-05 12:08:49,350][47609] Updated weights for policy 0, policy_version 20021 (0.0007) -[2024-07-05 12:08:51,079][47609] Updated weights for policy 0, policy_version 20031 (0.0008) -[2024-07-05 12:08:51,400][25826] Fps is (10 sec: 47513.4, 60 sec: 48059.8, 300 sec: 48207.8). Total num frames: 144097280. Throughput: 0: 12042.9. Samples: 10997992. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:08:51,401][25826] Avg episode reward: [(0, '49.710')] -[2024-07-05 12:08:52,761][47609] Updated weights for policy 0, policy_version 20041 (0.0008) -[2024-07-05 12:08:54,473][47609] Updated weights for policy 0, policy_version 20051 (0.0008) -[2024-07-05 12:08:56,140][47609] Updated weights for policy 0, policy_version 20061 (0.0007) -[2024-07-05 12:08:56,400][25826] Fps is (10 sec: 48333.1, 60 sec: 48196.1, 300 sec: 48235.6). Total num frames: 144343040. Throughput: 0: 12039.1. Samples: 11070268. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:08:56,401][25826] Avg episode reward: [(0, '48.707')] -[2024-07-05 12:08:57,867][47609] Updated weights for policy 0, policy_version 20071 (0.0007) -[2024-07-05 12:08:59,553][47609] Updated weights for policy 0, policy_version 20081 (0.0010) -[2024-07-05 12:09:01,232][47609] Updated weights for policy 0, policy_version 20091 (0.0007) -[2024-07-05 12:09:01,400][25826] Fps is (10 sec: 48332.3, 60 sec: 48059.7, 300 sec: 48207.8). Total num frames: 144580608. Throughput: 0: 12036.8. Samples: 11142360. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:09:01,401][25826] Avg episode reward: [(0, '45.857')] -[2024-07-05 12:09:01,404][47589] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000020091_144580608.pth... -[2024-07-05 12:09:01,486][47589] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000018679_133013504.pth -[2024-07-05 12:09:02,966][47609] Updated weights for policy 0, policy_version 20101 (0.0008) -[2024-07-05 12:09:04,671][47609] Updated weights for policy 0, policy_version 20111 (0.0009) -[2024-07-05 12:09:06,390][47609] Updated weights for policy 0, policy_version 20121 (0.0008) -[2024-07-05 12:09:06,400][25826] Fps is (10 sec: 48333.4, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 144826368. Throughput: 0: 12044.5. Samples: 11178460. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:09:06,401][25826] Avg episode reward: [(0, '47.919')] -[2024-07-05 12:09:08,098][47609] Updated weights for policy 0, policy_version 20131 (0.0008) -[2024-07-05 12:09:09,783][47609] Updated weights for policy 0, policy_version 20141 (0.0008) -[2024-07-05 12:09:11,400][25826] Fps is (10 sec: 48333.4, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 145063936. Throughput: 0: 12043.3. Samples: 11250764. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:09:11,401][25826] Avg episode reward: [(0, '48.002')] -[2024-07-05 12:09:11,478][47609] Updated weights for policy 0, policy_version 20151 (0.0007) -[2024-07-05 12:09:13,188][47609] Updated weights for policy 0, policy_version 20161 (0.0008) -[2024-07-05 12:09:14,899][47609] Updated weights for policy 0, policy_version 20171 (0.0008) -[2024-07-05 12:09:16,400][25826] Fps is (10 sec: 47514.0, 60 sec: 48059.8, 300 sec: 48207.9). Total num frames: 145301504. Throughput: 0: 12033.1. Samples: 11322732. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:09:16,401][25826] Avg episode reward: [(0, '50.806')] -[2024-07-05 12:09:16,583][47609] Updated weights for policy 0, policy_version 20181 (0.0007) -[2024-07-05 12:09:18,301][47609] Updated weights for policy 0, policy_version 20191 (0.0008) -[2024-07-05 12:09:20,021][47609] Updated weights for policy 0, policy_version 20201 (0.0008) -[2024-07-05 12:09:21,400][25826] Fps is (10 sec: 47512.6, 60 sec: 48059.7, 300 sec: 48180.0). Total num frames: 145539072. Throughput: 0: 12024.1. Samples: 11358860. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:09:21,401][25826] Avg episode reward: [(0, '48.132')] -[2024-07-05 12:09:21,744][47609] Updated weights for policy 0, policy_version 20211 (0.0008) -[2024-07-05 12:09:23,427][47609] Updated weights for policy 0, policy_version 20221 (0.0008) -[2024-07-05 12:09:25,108][47609] Updated weights for policy 0, policy_version 20231 (0.0007) -[2024-07-05 12:09:26,400][25826] Fps is (10 sec: 48332.4, 60 sec: 48059.8, 300 sec: 48207.9). Total num frames: 145784832. Throughput: 0: 12024.5. Samples: 11430912. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:09:26,401][25826] Avg episode reward: [(0, '49.738')] -[2024-07-05 12:09:26,799][47609] Updated weights for policy 0, policy_version 20241 (0.0008) -[2024-07-05 12:09:28,482][47609] Updated weights for policy 0, policy_version 20251 (0.0007) -[2024-07-05 12:09:30,209][47609] Updated weights for policy 0, policy_version 20261 (0.0008) -[2024-07-05 12:09:31,400][25826] Fps is (10 sec: 49153.4, 60 sec: 48196.4, 300 sec: 48207.9). Total num frames: 146030592. Throughput: 0: 12031.7. Samples: 11503408. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:09:31,401][25826] Avg episode reward: [(0, '50.718')] -[2024-07-05 12:09:31,893][47609] Updated weights for policy 0, policy_version 20271 (0.0008) -[2024-07-05 12:09:33,578][47609] Updated weights for policy 0, policy_version 20281 (0.0010) -[2024-07-05 12:09:35,313][47609] Updated weights for policy 0, policy_version 20291 (0.0011) -[2024-07-05 12:09:36,400][25826] Fps is (10 sec: 48332.8, 60 sec: 48059.8, 300 sec: 48207.9). Total num frames: 146268160. Throughput: 0: 12035.7. Samples: 11539600. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:09:36,401][25826] Avg episode reward: [(0, '48.514')] -[2024-07-05 12:09:37,030][47609] Updated weights for policy 0, policy_version 20301 (0.0008) -[2024-07-05 12:09:38,725][47609] Updated weights for policy 0, policy_version 20311 (0.0008) -[2024-07-05 12:09:40,446][47609] Updated weights for policy 0, policy_version 20321 (0.0007) -[2024-07-05 12:09:41,400][25826] Fps is (10 sec: 47512.9, 60 sec: 48059.6, 300 sec: 48180.1). Total num frames: 146505728. Throughput: 0: 12025.7. Samples: 11611424. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:09:41,401][25826] Avg episode reward: [(0, '48.312')] -[2024-07-05 12:09:42,108][47609] Updated weights for policy 0, policy_version 20331 (0.0008) -[2024-07-05 12:09:43,782][47609] Updated weights for policy 0, policy_version 20341 (0.0007) -[2024-07-05 12:09:45,499][47609] Updated weights for policy 0, policy_version 20351 (0.0011) -[2024-07-05 12:09:46,400][25826] Fps is (10 sec: 48332.3, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 146751488. Throughput: 0: 12033.1. Samples: 11683848. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:09:46,401][25826] Avg episode reward: [(0, '49.374')] -[2024-07-05 12:09:47,217][47609] Updated weights for policy 0, policy_version 20361 (0.0007) -[2024-07-05 12:09:48,890][47609] Updated weights for policy 0, policy_version 20371 (0.0007) -[2024-07-05 12:09:50,617][47609] Updated weights for policy 0, policy_version 20381 (0.0007) -[2024-07-05 12:09:51,400][25826] Fps is (10 sec: 48333.2, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 146989056. Throughput: 0: 12029.3. Samples: 11719776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:09:51,401][25826] Avg episode reward: [(0, '53.282')] -[2024-07-05 12:09:51,413][47589] Saving new best policy, reward=53.282! -[2024-07-05 12:09:52,337][47609] Updated weights for policy 0, policy_version 20391 (0.0007) -[2024-07-05 12:09:54,030][47609] Updated weights for policy 0, policy_version 20401 (0.0008) -[2024-07-05 12:09:55,742][47609] Updated weights for policy 0, policy_version 20411 (0.0008) -[2024-07-05 12:09:56,400][25826] Fps is (10 sec: 47513.7, 60 sec: 48059.8, 300 sec: 48180.0). Total num frames: 147226624. Throughput: 0: 12033.5. Samples: 11792272. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:09:56,401][25826] Avg episode reward: [(0, '50.364')] -[2024-07-05 12:09:57,434][47609] Updated weights for policy 0, policy_version 20421 (0.0008) -[2024-07-05 12:09:59,161][47609] Updated weights for policy 0, policy_version 20431 (0.0008) -[2024-07-05 12:10:00,815][47609] Updated weights for policy 0, policy_version 20441 (0.0008) -[2024-07-05 12:10:01,400][25826] Fps is (10 sec: 48332.3, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 147472384. Throughput: 0: 12038.5. Samples: 11864468. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:10:01,401][25826] Avg episode reward: [(0, '49.245')] -[2024-07-05 12:10:02,574][47609] Updated weights for policy 0, policy_version 20451 (0.0008) -[2024-07-05 12:10:04,274][47609] Updated weights for policy 0, policy_version 20461 (0.0008) -[2024-07-05 12:10:05,958][47609] Updated weights for policy 0, policy_version 20471 (0.0007) -[2024-07-05 12:10:06,400][25826] Fps is (10 sec: 48333.3, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 147709952. Throughput: 0: 12030.5. Samples: 11900232. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:10:06,401][25826] Avg episode reward: [(0, '49.783')] -[2024-07-05 12:10:07,661][47609] Updated weights for policy 0, policy_version 20481 (0.0008) -[2024-07-05 12:10:09,360][47609] Updated weights for policy 0, policy_version 20491 (0.0009) -[2024-07-05 12:10:11,080][47609] Updated weights for policy 0, policy_version 20501 (0.0007) -[2024-07-05 12:10:11,400][25826] Fps is (10 sec: 47514.2, 60 sec: 48059.8, 300 sec: 48180.1). Total num frames: 147947520. Throughput: 0: 12039.2. Samples: 11972676. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:10:11,401][25826] Avg episode reward: [(0, '49.822')] -[2024-07-05 12:10:12,770][47609] Updated weights for policy 0, policy_version 20511 (0.0007) -[2024-07-05 12:10:14,480][47609] Updated weights for policy 0, policy_version 20521 (0.0008) -[2024-07-05 12:10:16,164][47609] Updated weights for policy 0, policy_version 20531 (0.0010) -[2024-07-05 12:10:16,400][25826] Fps is (10 sec: 48332.4, 60 sec: 48196.1, 300 sec: 48180.2). Total num frames: 148193280. Throughput: 0: 12022.3. Samples: 12044412. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:10:16,401][25826] Avg episode reward: [(0, '49.984')] -[2024-07-05 12:10:17,866][47609] Updated weights for policy 0, policy_version 20541 (0.0009) -[2024-07-05 12:10:19,556][47609] Updated weights for policy 0, policy_version 20551 (0.0008) -[2024-07-05 12:10:21,231][47609] Updated weights for policy 0, policy_version 20561 (0.0007) -[2024-07-05 12:10:21,400][25826] Fps is (10 sec: 48332.5, 60 sec: 48196.4, 300 sec: 48180.1). Total num frames: 148430848. Throughput: 0: 12031.4. Samples: 12081012. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:10:21,401][25826] Avg episode reward: [(0, '48.351')] -[2024-07-05 12:10:22,956][47609] Updated weights for policy 0, policy_version 20571 (0.0008) -[2024-07-05 12:10:24,671][47609] Updated weights for policy 0, policy_version 20581 (0.0010) -[2024-07-05 12:10:26,386][47609] Updated weights for policy 0, policy_version 20591 (0.0008) -[2024-07-05 12:10:26,400][25826] Fps is (10 sec: 48332.5, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 148676608. Throughput: 0: 12042.1. Samples: 12153320. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:10:26,401][25826] Avg episode reward: [(0, '49.178')] -[2024-07-05 12:10:28,091][47609] Updated weights for policy 0, policy_version 20601 (0.0008) -[2024-07-05 12:10:29,765][47609] Updated weights for policy 0, policy_version 20611 (0.0007) -[2024-07-05 12:10:31,399][47609] Updated weights for policy 0, policy_version 20621 (0.0010) -[2024-07-05 12:10:31,400][25826] Fps is (10 sec: 49150.9, 60 sec: 48196.0, 300 sec: 48207.8). Total num frames: 148922368. Throughput: 0: 12048.4. Samples: 12226028. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:10:31,401][25826] Avg episode reward: [(0, '45.100')] -[2024-07-05 12:10:33,130][47609] Updated weights for policy 0, policy_version 20631 (0.0011) -[2024-07-05 12:10:34,835][47609] Updated weights for policy 0, policy_version 20641 (0.0009) -[2024-07-05 12:10:36,400][25826] Fps is (10 sec: 47513.5, 60 sec: 48059.6, 300 sec: 48152.3). Total num frames: 149151744. Throughput: 0: 12049.9. Samples: 12262024. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:10:36,401][25826] Avg episode reward: [(0, '48.190')] -[2024-07-05 12:10:36,526][47609] Updated weights for policy 0, policy_version 20651 (0.0007) -[2024-07-05 12:10:38,239][47609] Updated weights for policy 0, policy_version 20661 (0.0008) -[2024-07-05 12:10:39,899][47609] Updated weights for policy 0, policy_version 20671 (0.0008) -[2024-07-05 12:10:41,400][25826] Fps is (10 sec: 47514.0, 60 sec: 48196.2, 300 sec: 48180.0). Total num frames: 149397504. Throughput: 0: 12036.4. Samples: 12333912. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 12:10:41,401][25826] Avg episode reward: [(0, '46.254')] -[2024-07-05 12:10:41,659][47609] Updated weights for policy 0, policy_version 20681 (0.0008) -[2024-07-05 12:10:43,333][47609] Updated weights for policy 0, policy_version 20691 (0.0008) -[2024-07-05 12:10:45,057][47609] Updated weights for policy 0, policy_version 20701 (0.0008) -[2024-07-05 12:10:46,400][25826] Fps is (10 sec: 48333.5, 60 sec: 48059.8, 300 sec: 48152.3). Total num frames: 149635072. Throughput: 0: 12038.4. Samples: 12406196. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 12:10:46,401][25826] Avg episode reward: [(0, '49.193')] -[2024-07-05 12:10:46,748][47609] Updated weights for policy 0, policy_version 20711 (0.0008) -[2024-07-05 12:10:48,476][47609] Updated weights for policy 0, policy_version 20721 (0.0009) -[2024-07-05 12:10:50,172][47609] Updated weights for policy 0, policy_version 20731 (0.0008) -[2024-07-05 12:10:51,400][25826] Fps is (10 sec: 48333.6, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 149880832. Throughput: 0: 12043.0. Samples: 12442168. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 12:10:51,401][25826] Avg episode reward: [(0, '49.029')] -[2024-07-05 12:10:51,887][47609] Updated weights for policy 0, policy_version 20741 (0.0010) -[2024-07-05 12:10:53,592][47609] Updated weights for policy 0, policy_version 20751 (0.0014) -[2024-07-05 12:10:54,141][47589] Stopping Batcher_0... -[2024-07-05 12:10:54,141][47589] Loop batcher_evt_loop terminating... -[2024-07-05 12:10:54,141][25826] Component Batcher_0 stopped! -[2024-07-05 12:10:54,143][47589] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000020754_150011904.pth... -[2024-07-05 12:10:54,180][47609] Weights refcount: 2 0 -[2024-07-05 12:10:54,186][47609] Stopping InferenceWorker_p0-w0... -[2024-07-05 12:10:54,187][47609] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 12:10:54,187][25826] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 12:10:54,207][47611] Stopping RolloutWorker_w3... -[2024-07-05 12:10:54,207][47634] Stopping RolloutWorker_w9... -[2024-07-05 12:10:54,207][47613] Stopping RolloutWorker_w2... -[2024-07-05 12:10:54,207][47638] Stopping RolloutWorker_w15... -[2024-07-05 12:10:54,207][25826] Component RolloutWorker_w9 stopped! -[2024-07-05 12:10:54,208][47611] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 12:10:54,208][47638] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 12:10:54,208][47613] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 12:10:54,208][47610] Stopping RolloutWorker_w0... -[2024-07-05 12:10:54,208][47615] Stopping RolloutWorker_w5... -[2024-07-05 12:10:54,208][47610] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 12:10:54,208][47612] Stopping RolloutWorker_w1... -[2024-07-05 12:10:54,208][25826] Component RolloutWorker_w3 stopped! -[2024-07-05 12:10:54,209][47612] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 12:10:54,209][47615] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 12:10:54,209][25826] Component RolloutWorker_w2 stopped! -[2024-07-05 12:10:54,209][47636] Stopping RolloutWorker_w13... -[2024-07-05 12:10:54,209][47640] Stopping RolloutWorker_w14... -[2024-07-05 12:10:54,210][25826] Component RolloutWorker_w15 stopped! -[2024-07-05 12:10:54,210][47640] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 12:10:54,210][47636] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 12:10:54,210][47616] Stopping RolloutWorker_w7... -[2024-07-05 12:10:54,210][47639] Stopping RolloutWorker_w12... -[2024-07-05 12:10:54,210][47618] Stopping RolloutWorker_w6... -[2024-07-05 12:10:54,210][47635] Stopping RolloutWorker_w11... -[2024-07-05 12:10:54,211][47616] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 12:10:54,211][47639] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 12:10:54,211][47618] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 12:10:54,210][25826] Component RolloutWorker_w0 stopped! -[2024-07-05 12:10:54,211][47635] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 12:10:54,211][47614] Stopping RolloutWorker_w4... -[2024-07-05 12:10:54,212][47614] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 12:10:54,211][25826] Component RolloutWorker_w5 stopped! -[2024-07-05 12:10:54,212][25826] Component RolloutWorker_w1 stopped! -[2024-07-05 12:10:54,213][25826] Component RolloutWorker_w13 stopped! -[2024-07-05 12:10:54,214][25826] Component RolloutWorker_w14 stopped! -[2024-07-05 12:10:54,214][25826] Component RolloutWorker_w7 stopped! -[2024-07-05 12:10:54,215][25826] Component RolloutWorker_w12 stopped! -[2024-07-05 12:10:54,216][47617] Stopping RolloutWorker_w8... -[2024-07-05 12:10:54,216][25826] Component RolloutWorker_w6 stopped! -[2024-07-05 12:10:54,216][47617] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 12:10:54,216][25826] Component RolloutWorker_w11 stopped! -[2024-07-05 12:10:54,217][25826] Component RolloutWorker_w4 stopped! -[2024-07-05 12:10:54,217][25826] Component RolloutWorker_w8 stopped! -[2024-07-05 12:10:54,218][47637] Stopping RolloutWorker_w10... -[2024-07-05 12:10:54,219][47637] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 12:10:54,218][25826] Component RolloutWorker_w10 stopped! -[2024-07-05 12:10:54,218][47634] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 12:10:54,231][47589] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000019386_138805248.pth -[2024-07-05 12:10:54,234][47589] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000020754_150011904.pth... -[2024-07-05 12:10:54,344][47589] Stopping LearnerWorker_p0... -[2024-07-05 12:10:54,345][47589] Loop learner_proc0_evt_loop terminating... -[2024-07-05 12:10:54,344][25826] Component LearnerWorker_p0 stopped! -[2024-07-05 12:10:54,346][25826] Waiting for process learner_proc0 to stop... -[2024-07-05 12:10:55,746][25826] Waiting for process inference_proc0-0 to join... -[2024-07-05 12:10:55,747][25826] Waiting for process rollout_proc0 to join... -[2024-07-05 12:10:55,748][25826] Waiting for process rollout_proc1 to join... -[2024-07-05 12:10:55,748][25826] Waiting for process rollout_proc2 to join... -[2024-07-05 12:10:55,749][25826] Waiting for process rollout_proc3 to join... -[2024-07-05 12:10:55,749][25826] Waiting for process rollout_proc4 to join... -[2024-07-05 12:10:55,749][25826] Waiting for process rollout_proc5 to join... -[2024-07-05 12:10:55,749][25826] Waiting for process rollout_proc6 to join... -[2024-07-05 12:10:55,750][25826] Waiting for process rollout_proc7 to join... -[2024-07-05 12:10:55,750][25826] Waiting for process rollout_proc8 to join... -[2024-07-05 12:10:55,750][25826] Waiting for process rollout_proc9 to join... -[2024-07-05 12:10:55,751][25826] Waiting for process rollout_proc10 to join... -[2024-07-05 12:10:55,751][25826] Waiting for process rollout_proc11 to join... -[2024-07-05 12:10:55,751][25826] Waiting for process rollout_proc12 to join... -[2024-07-05 12:10:55,752][25826] Waiting for process rollout_proc13 to join... -[2024-07-05 12:10:55,752][25826] Waiting for process rollout_proc14 to join... -[2024-07-05 12:10:55,752][25826] Waiting for process rollout_proc15 to join... -[2024-07-05 12:10:55,752][25826] Batcher 0 profile tree view: -batching: 72.9549, releasing_batches: 0.1495 -[2024-07-05 12:10:55,753][25826] InferenceWorker_p0-w0 profile tree view: +[2024-07-05 00:02:25,003][45720] Using optimizer +[2024-07-05 00:02:25,507][45720] No checkpoints found +[2024-07-05 00:02:25,507][45720] Did not load from checkpoint, starting from scratch! +[2024-07-05 00:02:25,507][45720] Initialized policy 0 weights for model version 0 +[2024-07-05 00:02:25,508][45720] LearnerWorker_p0 finished initialization! +[2024-07-05 00:02:25,509][45720] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 00:02:25,563][45457] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 00:02:25,580][45733] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 00:02:25,582][45733] RunningMeanStd input shape: (1,) +[2024-07-05 00:02:25,589][45733] Num input channels: 3 +[2024-07-05 00:02:25,600][45733] Convolutional layer output size: 4608 +[2024-07-05 00:02:25,611][45733] Policy head output size: 512 +[2024-07-05 00:02:25,735][45457] Inference worker 0-0 is ready! +[2024-07-05 00:02:25,736][45457] All inference workers are ready! Signal rollout workers to start! +[2024-07-05 00:02:25,765][45741] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 00:02:25,765][45739] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 00:02:25,766][45737] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 00:02:25,766][45735] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 00:02:25,766][45736] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 00:02:25,766][45734] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 00:02:25,766][45740] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 00:02:25,767][45738] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 00:02:26,263][45736] Decorrelating experience for 0 frames... +[2024-07-05 00:02:26,264][45735] Decorrelating experience for 0 frames... +[2024-07-05 00:02:26,264][45741] Decorrelating experience for 0 frames... +[2024-07-05 00:02:26,264][45737] Decorrelating experience for 0 frames... +[2024-07-05 00:02:26,264][45734] Decorrelating experience for 0 frames... +[2024-07-05 00:02:26,265][45739] Decorrelating experience for 0 frames... +[2024-07-05 00:02:26,420][45735] Decorrelating experience for 32 frames... +[2024-07-05 00:02:26,421][45737] Decorrelating experience for 32 frames... +[2024-07-05 00:02:26,422][45734] Decorrelating experience for 32 frames... +[2024-07-05 00:02:26,422][45741] Decorrelating experience for 32 frames... +[2024-07-05 00:02:26,422][45739] Decorrelating experience for 32 frames... +[2024-07-05 00:02:26,467][45738] Decorrelating experience for 0 frames... +[2024-07-05 00:02:26,615][45740] Decorrelating experience for 0 frames... +[2024-07-05 00:02:26,626][45735] Decorrelating experience for 64 frames... +[2024-07-05 00:02:26,628][45737] Decorrelating experience for 64 frames... +[2024-07-05 00:02:26,629][45739] Decorrelating experience for 64 frames... +[2024-07-05 00:02:26,629][45734] Decorrelating experience for 64 frames... +[2024-07-05 00:02:26,677][45736] Decorrelating experience for 32 frames... +[2024-07-05 00:02:26,781][45738] Decorrelating experience for 32 frames... +[2024-07-05 00:02:26,811][45735] Decorrelating experience for 96 frames... +[2024-07-05 00:02:26,811][45737] Decorrelating experience for 96 frames... +[2024-07-05 00:02:26,812][45734] Decorrelating experience for 96 frames... +[2024-07-05 00:02:26,837][45740] Decorrelating experience for 32 frames... +[2024-07-05 00:02:26,845][45741] Decorrelating experience for 64 frames... +[2024-07-05 00:02:26,887][45736] Decorrelating experience for 64 frames... +[2024-07-05 00:02:26,969][45739] Decorrelating experience for 96 frames... +[2024-07-05 00:02:27,025][45741] Decorrelating experience for 96 frames... +[2024-07-05 00:02:27,032][45740] Decorrelating experience for 64 frames... +[2024-07-05 00:02:27,069][45738] Decorrelating experience for 64 frames... +[2024-07-05 00:02:27,161][45736] Decorrelating experience for 96 frames... +[2024-07-05 00:02:27,206][45740] Decorrelating experience for 96 frames... +[2024-07-05 00:02:27,349][45738] Decorrelating experience for 96 frames... +[2024-07-05 00:02:27,930][45720] Signal inference workers to stop experience collection... +[2024-07-05 00:02:27,935][45733] InferenceWorker_p0-w0: stopping experience collection +[2024-07-05 00:02:30,563][45457] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 450.8. Samples: 2254. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 00:02:30,564][45457] Avg episode reward: [(0, '1.905')] +[2024-07-05 00:02:31,327][45720] Signal inference workers to resume experience collection... +[2024-07-05 00:02:31,327][45733] InferenceWorker_p0-w0: resuming experience collection +[2024-07-05 00:02:35,563][45457] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3686.4). Total num frames: 36864. Throughput: 0: 877.6. Samples: 8776. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 00:02:35,564][45457] Avg episode reward: [(0, '4.019')] +[2024-07-05 00:02:35,777][45733] Updated weights for policy 0, policy_version 10 (0.0100) +[2024-07-05 00:02:40,408][45733] Updated weights for policy 0, policy_version 20 (0.0012) +[2024-07-05 00:02:40,563][45457] Fps is (10 sec: 8192.0, 60 sec: 5461.3, 300 sec: 5461.3). Total num frames: 81920. Throughput: 0: 1042.0. Samples: 15630. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 00:02:40,564][45457] Avg episode reward: [(0, '4.720')] +[2024-07-05 00:02:40,909][45457] Heartbeat connected on Batcher_0 +[2024-07-05 00:02:40,912][45457] Heartbeat connected on LearnerWorker_p0 +[2024-07-05 00:02:40,921][45457] Heartbeat connected on InferenceWorker_p0-w0 +[2024-07-05 00:02:40,922][45457] Heartbeat connected on RolloutWorker_w0 +[2024-07-05 00:02:40,925][45457] Heartbeat connected on RolloutWorker_w1 +[2024-07-05 00:02:40,928][45457] Heartbeat connected on RolloutWorker_w2 +[2024-07-05 00:02:40,931][45457] Heartbeat connected on RolloutWorker_w3 +[2024-07-05 00:02:40,935][45457] Heartbeat connected on RolloutWorker_w4 +[2024-07-05 00:02:40,938][45457] Heartbeat connected on RolloutWorker_w5 +[2024-07-05 00:02:40,942][45457] Heartbeat connected on RolloutWorker_w6 +[2024-07-05 00:02:40,946][45457] Heartbeat connected on RolloutWorker_w7 +[2024-07-05 00:02:44,979][45733] Updated weights for policy 0, policy_version 30 (0.0011) +[2024-07-05 00:02:45,563][45457] Fps is (10 sec: 9011.1, 60 sec: 6348.8, 300 sec: 6348.8). Total num frames: 126976. Throughput: 0: 1445.6. Samples: 28912. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 00:02:45,564][45457] Avg episode reward: [(0, '5.009')] +[2024-07-05 00:02:45,905][45720] Saving new best policy, reward=5.009! +[2024-07-05 00:02:49,704][45733] Updated weights for policy 0, policy_version 40 (0.0011) +[2024-07-05 00:02:50,562][45457] Fps is (10 sec: 8601.8, 60 sec: 6717.5, 300 sec: 6717.5). Total num frames: 167936. Throughput: 0: 1678.5. Samples: 41962. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 00:02:50,563][45457] Avg episode reward: [(0, '5.006')] +[2024-07-05 00:02:54,399][45733] Updated weights for policy 0, policy_version 50 (0.0012) +[2024-07-05 00:02:55,563][45457] Fps is (10 sec: 8601.7, 60 sec: 7099.7, 300 sec: 7099.7). Total num frames: 212992. Throughput: 0: 1619.1. Samples: 48574. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 00:02:55,564][45457] Avg episode reward: [(0, '5.173')] +[2024-07-05 00:02:55,774][45720] Saving new best policy, reward=5.173! +[2024-07-05 00:02:59,032][45733] Updated weights for policy 0, policy_version 60 (0.0012) +[2024-07-05 00:03:00,563][45457] Fps is (10 sec: 9011.1, 60 sec: 7372.8, 300 sec: 7372.8). Total num frames: 258048. Throughput: 0: 1765.3. Samples: 61786. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 00:03:00,564][45457] Avg episode reward: [(0, '5.099')] +[2024-07-05 00:03:03,738][45733] Updated weights for policy 0, policy_version 70 (0.0012) +[2024-07-05 00:03:05,563][45457] Fps is (10 sec: 8601.5, 60 sec: 7475.2, 300 sec: 7475.2). Total num frames: 299008. Throughput: 0: 1872.4. Samples: 74898. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 00:03:05,564][45457] Avg episode reward: [(0, '5.730')] +[2024-07-05 00:03:05,635][45720] Saving new best policy, reward=5.730! +[2024-07-05 00:03:08,546][45733] Updated weights for policy 0, policy_version 80 (0.0011) +[2024-07-05 00:03:10,563][45457] Fps is (10 sec: 8601.6, 60 sec: 7645.9, 300 sec: 7645.9). Total num frames: 344064. Throughput: 0: 1805.3. Samples: 81240. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 00:03:10,563][45457] Avg episode reward: [(0, '5.998')] +[2024-07-05 00:03:10,918][45720] Saving new best policy, reward=5.998! +[2024-07-05 00:03:13,273][45733] Updated weights for policy 0, policy_version 90 (0.0011) +[2024-07-05 00:03:15,562][45457] Fps is (10 sec: 8601.8, 60 sec: 7700.5, 300 sec: 7700.5). Total num frames: 385024. Throughput: 0: 2040.5. Samples: 94076. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 00:03:15,563][45457] Avg episode reward: [(0, '6.024')] +[2024-07-05 00:03:15,667][45720] Saving new best policy, reward=6.024! +[2024-07-05 00:03:18,069][45733] Updated weights for policy 0, policy_version 100 (0.0011) +[2024-07-05 00:03:20,563][45457] Fps is (10 sec: 8601.6, 60 sec: 7819.6, 300 sec: 7819.6). Total num frames: 430080. Throughput: 0: 2184.6. Samples: 107082. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:03:20,564][45457] Avg episode reward: [(0, '6.507')] +[2024-07-05 00:03:20,918][45720] Saving new best policy, reward=6.507! +[2024-07-05 00:03:22,877][45733] Updated weights for policy 0, policy_version 110 (0.0011) +[2024-07-05 00:03:25,562][45457] Fps is (10 sec: 8601.5, 60 sec: 7850.7, 300 sec: 7850.7). Total num frames: 471040. Throughput: 0: 2171.5. Samples: 113346. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0) +[2024-07-05 00:03:25,563][45457] Avg episode reward: [(0, '5.608')] +[2024-07-05 00:03:27,631][45733] Updated weights for policy 0, policy_version 120 (0.0011) +[2024-07-05 00:03:30,563][45457] Fps is (10 sec: 8601.6, 60 sec: 8601.6, 300 sec: 7939.9). Total num frames: 516096. Throughput: 0: 2167.2. Samples: 126438. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:03:30,564][45457] Avg episode reward: [(0, '5.695')] +[2024-07-05 00:03:32,342][45733] Updated weights for policy 0, policy_version 130 (0.0011) +[2024-07-05 00:03:35,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8669.9, 300 sec: 7957.9). Total num frames: 557056. Throughput: 0: 2166.1. Samples: 139436. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:03:35,564][45457] Avg episode reward: [(0, '7.029')] +[2024-07-05 00:03:35,633][45720] Saving new best policy, reward=7.029! +[2024-07-05 00:03:37,106][45733] Updated weights for policy 0, policy_version 140 (0.0011) +[2024-07-05 00:03:40,563][45457] Fps is (10 sec: 8601.7, 60 sec: 8669.9, 300 sec: 8028.2). Total num frames: 602112. Throughput: 0: 2160.7. Samples: 145806. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:03:40,563][45457] Avg episode reward: [(0, '7.417')] +[2024-07-05 00:03:40,889][45720] Saving new best policy, reward=7.417! +[2024-07-05 00:03:41,840][45733] Updated weights for policy 0, policy_version 150 (0.0012) +[2024-07-05 00:03:45,562][45457] Fps is (10 sec: 8601.7, 60 sec: 8601.6, 300 sec: 8038.4). Total num frames: 643072. Throughput: 0: 2153.0. Samples: 158670. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:03:45,563][45457] Avg episode reward: [(0, '7.178')] +[2024-07-05 00:03:46,612][45733] Updated weights for policy 0, policy_version 160 (0.0011) +[2024-07-05 00:03:50,563][45457] Fps is (10 sec: 8601.0, 60 sec: 8669.8, 300 sec: 8095.6). Total num frames: 688128. Throughput: 0: 2151.0. Samples: 171696. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:03:50,568][45457] Avg episode reward: [(0, '6.736')] +[2024-07-05 00:03:51,379][45733] Updated weights for policy 0, policy_version 170 (0.0012) +[2024-07-05 00:03:55,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8601.6, 300 sec: 8101.0). Total num frames: 729088. Throughput: 0: 2151.1. Samples: 178040. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:03:55,563][45457] Avg episode reward: [(0, '8.034')] +[2024-07-05 00:03:55,657][45720] Saving new best policy, reward=8.034! +[2024-07-05 00:03:56,147][45733] Updated weights for policy 0, policy_version 180 (0.0012) +[2024-07-05 00:04:00,564][45457] Fps is (10 sec: 8601.3, 60 sec: 8601.5, 300 sec: 8148.8). Total num frames: 774144. Throughput: 0: 2152.6. Samples: 190946. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:04:00,566][45457] Avg episode reward: [(0, '8.884')] +[2024-07-05 00:04:00,938][45720] Saving new best policy, reward=8.884! +[2024-07-05 00:04:00,939][45733] Updated weights for policy 0, policy_version 190 (0.0012) +[2024-07-05 00:04:05,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8601.6, 300 sec: 8151.0). Total num frames: 815104. Throughput: 0: 2147.5. Samples: 203718. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:04:05,564][45457] Avg episode reward: [(0, '9.313')] +[2024-07-05 00:04:05,678][45720] Saving new best policy, reward=9.313! +[2024-07-05 00:04:05,681][45733] Updated weights for policy 0, policy_version 200 (0.0011) +[2024-07-05 00:04:10,537][45733] Updated weights for policy 0, policy_version 210 (0.0011) +[2024-07-05 00:04:10,562][45457] Fps is (10 sec: 8602.6, 60 sec: 8601.6, 300 sec: 8192.0). Total num frames: 860160. Throughput: 0: 2155.6. Samples: 210350. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:04:10,564][45457] Avg episode reward: [(0, '9.937')] +[2024-07-05 00:04:10,565][45720] Saving new best policy, reward=9.937! +[2024-07-05 00:04:15,465][45733] Updated weights for policy 0, policy_version 220 (0.0012) +[2024-07-05 00:04:15,564][45457] Fps is (10 sec: 8600.8, 60 sec: 8601.4, 300 sec: 8191.9). Total num frames: 901120. Throughput: 0: 2141.2. Samples: 222794. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:04:15,568][45457] Avg episode reward: [(0, '12.334')] +[2024-07-05 00:04:15,961][45720] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000000221_905216.pth... +[2024-07-05 00:04:16,068][45720] Saving new best policy, reward=12.334! +[2024-07-05 00:04:20,434][45733] Updated weights for policy 0, policy_version 230 (0.0012) +[2024-07-05 00:04:20,563][45457] Fps is (10 sec: 8191.9, 60 sec: 8533.3, 300 sec: 8192.0). Total num frames: 942080. Throughput: 0: 2128.4. Samples: 235214. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:04:20,563][45457] Avg episode reward: [(0, '13.733')] +[2024-07-05 00:04:20,914][45720] Saving new best policy, reward=13.733! +[2024-07-05 00:04:25,324][45733] Updated weights for policy 0, policy_version 240 (0.0011) +[2024-07-05 00:04:25,562][45457] Fps is (10 sec: 8192.9, 60 sec: 8533.3, 300 sec: 8192.0). Total num frames: 983040. Throughput: 0: 2123.8. Samples: 241376. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:04:25,563][45457] Avg episode reward: [(0, '11.842')] +[2024-07-05 00:04:30,088][45733] Updated weights for policy 0, policy_version 250 (0.0011) +[2024-07-05 00:04:30,562][45457] Fps is (10 sec: 8192.1, 60 sec: 8465.1, 300 sec: 8192.0). Total num frames: 1024000. Throughput: 0: 2126.3. Samples: 254352. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:04:30,564][45457] Avg episode reward: [(0, '12.738')] +[2024-07-05 00:04:34,873][45733] Updated weights for policy 0, policy_version 260 (0.0012) +[2024-07-05 00:04:35,564][45457] Fps is (10 sec: 8600.8, 60 sec: 8533.2, 300 sec: 8223.4). Total num frames: 1069056. Throughput: 0: 2120.1. Samples: 267102. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:04:35,565][45457] Avg episode reward: [(0, '13.469')] +[2024-07-05 00:04:39,695][45733] Updated weights for policy 0, policy_version 270 (0.0012) +[2024-07-05 00:04:40,562][45457] Fps is (10 sec: 8601.6, 60 sec: 8465.1, 300 sec: 8222.3). Total num frames: 1110016. Throughput: 0: 2119.2. Samples: 273404. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:04:40,563][45457] Avg episode reward: [(0, '12.910')] +[2024-07-05 00:04:44,483][45733] Updated weights for policy 0, policy_version 280 (0.0011) +[2024-07-05 00:04:45,563][45457] Fps is (10 sec: 8601.9, 60 sec: 8533.2, 300 sec: 8250.5). Total num frames: 1155072. Throughput: 0: 2119.9. Samples: 286340. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:04:45,568][45457] Avg episode reward: [(0, '14.045')] +[2024-07-05 00:04:45,920][45720] Saving new best policy, reward=14.045! +[2024-07-05 00:04:49,293][45733] Updated weights for policy 0, policy_version 290 (0.0011) +[2024-07-05 00:04:50,563][45457] Fps is (10 sec: 8600.9, 60 sec: 8465.1, 300 sec: 8248.5). Total num frames: 1196032. Throughput: 0: 2117.8. Samples: 299020. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:04:50,565][45457] Avg episode reward: [(0, '14.268')] +[2024-07-05 00:04:50,720][45720] Saving new best policy, reward=14.268! +[2024-07-05 00:04:54,165][45733] Updated weights for policy 0, policy_version 300 (0.0011) +[2024-07-05 00:04:55,562][45457] Fps is (10 sec: 8192.6, 60 sec: 8465.1, 300 sec: 8246.6). Total num frames: 1236992. Throughput: 0: 2107.6. Samples: 305194. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 00:04:55,563][45457] Avg episode reward: [(0, '13.382')] +[2024-07-05 00:04:58,942][45733] Updated weights for policy 0, policy_version 310 (0.0011) +[2024-07-05 00:05:00,563][45457] Fps is (10 sec: 8602.2, 60 sec: 8465.2, 300 sec: 8271.3). Total num frames: 1282048. Throughput: 0: 2121.2. Samples: 318246. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 00:05:00,564][45457] Avg episode reward: [(0, '15.909')] +[2024-07-05 00:05:00,858][45720] Saving new best policy, reward=15.909! +[2024-07-05 00:05:03,823][45733] Updated weights for policy 0, policy_version 320 (0.0011) +[2024-07-05 00:05:05,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8465.1, 300 sec: 8268.8). Total num frames: 1323008. Throughput: 0: 2123.6. Samples: 330776. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 00:05:05,564][45457] Avg episode reward: [(0, '17.249')] +[2024-07-05 00:05:05,723][45720] Saving new best policy, reward=17.249! +[2024-07-05 00:05:08,668][45733] Updated weights for policy 0, policy_version 330 (0.0011) +[2024-07-05 00:05:10,563][45457] Fps is (10 sec: 8192.1, 60 sec: 8396.8, 300 sec: 8266.5). Total num frames: 1363968. Throughput: 0: 2127.2. Samples: 337102. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:05:10,564][45457] Avg episode reward: [(0, '18.132')] +[2024-07-05 00:05:10,565][45720] Saving new best policy, reward=18.132! +[2024-07-05 00:05:13,522][45733] Updated weights for policy 0, policy_version 340 (0.0012) +[2024-07-05 00:05:15,563][45457] Fps is (10 sec: 8601.0, 60 sec: 8465.1, 300 sec: 8288.3). Total num frames: 1409024. Throughput: 0: 2122.3. Samples: 349856. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:05:15,565][45457] Avg episode reward: [(0, '18.656')] +[2024-07-05 00:05:15,908][45720] Saving new best policy, reward=18.656! +[2024-07-05 00:05:18,371][45733] Updated weights for policy 0, policy_version 350 (0.0011) +[2024-07-05 00:05:20,563][45457] Fps is (10 sec: 8601.2, 60 sec: 8465.0, 300 sec: 8285.6). Total num frames: 1449984. Throughput: 0: 2118.2. Samples: 362420. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:05:20,564][45457] Avg episode reward: [(0, '20.924')] +[2024-07-05 00:05:20,800][45720] Saving new best policy, reward=20.924! +[2024-07-05 00:05:23,208][45733] Updated weights for policy 0, policy_version 360 (0.0011) +[2024-07-05 00:05:25,563][45457] Fps is (10 sec: 8192.6, 60 sec: 8465.0, 300 sec: 8283.0). Total num frames: 1490944. Throughput: 0: 2119.2. Samples: 368770. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:05:25,564][45457] Avg episode reward: [(0, '19.165')] +[2024-07-05 00:05:28,046][45733] Updated weights for policy 0, policy_version 370 (0.0011) +[2024-07-05 00:05:30,564][45457] Fps is (10 sec: 8601.2, 60 sec: 8533.2, 300 sec: 8302.7). Total num frames: 1536000. Throughput: 0: 2116.9. Samples: 381602. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:05:30,565][45457] Avg episode reward: [(0, '17.827')] +[2024-07-05 00:05:32,898][45733] Updated weights for policy 0, policy_version 380 (0.0011) +[2024-07-05 00:05:35,564][45457] Fps is (10 sec: 8600.9, 60 sec: 8465.1, 300 sec: 8299.8). Total num frames: 1576960. Throughput: 0: 2113.9. Samples: 394146. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:05:35,566][45457] Avg episode reward: [(0, '19.011')] +[2024-07-05 00:05:37,689][45733] Updated weights for policy 0, policy_version 390 (0.0011) +[2024-07-05 00:05:40,563][45457] Fps is (10 sec: 8192.7, 60 sec: 8465.0, 300 sec: 8297.0). Total num frames: 1617920. Throughput: 0: 2121.9. Samples: 400682. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:05:40,564][45457] Avg episode reward: [(0, '18.876')] +[2024-07-05 00:05:42,506][45733] Updated weights for policy 0, policy_version 400 (0.0011) +[2024-07-05 00:05:45,563][45457] Fps is (10 sec: 8602.3, 60 sec: 8465.1, 300 sec: 8314.9). Total num frames: 1662976. Throughput: 0: 2113.2. Samples: 413338. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:05:45,564][45457] Avg episode reward: [(0, '19.716')] +[2024-07-05 00:05:47,220][45733] Updated weights for policy 0, policy_version 410 (0.0013) +[2024-07-05 00:05:50,562][45457] Fps is (10 sec: 8601.7, 60 sec: 8465.2, 300 sec: 8311.9). Total num frames: 1703936. Throughput: 0: 2124.6. Samples: 426382. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:05:50,563][45457] Avg episode reward: [(0, '19.487')] +[2024-07-05 00:05:52,035][45733] Updated weights for policy 0, policy_version 420 (0.0013) +[2024-07-05 00:05:55,563][45457] Fps is (10 sec: 8601.6, 60 sec: 8533.3, 300 sec: 8328.5). Total num frames: 1748992. Throughput: 0: 2124.3. Samples: 432694. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:05:55,564][45457] Avg episode reward: [(0, '19.981')] +[2024-07-05 00:05:56,869][45733] Updated weights for policy 0, policy_version 430 (0.0012) +[2024-07-05 00:06:00,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8465.1, 300 sec: 8325.4). Total num frames: 1789952. Throughput: 0: 2120.3. Samples: 445266. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:00,564][45457] Avg episode reward: [(0, '20.921')] +[2024-07-05 00:06:01,749][45733] Updated weights for policy 0, policy_version 440 (0.0011) +[2024-07-05 00:06:05,563][45457] Fps is (10 sec: 8192.0, 60 sec: 8465.1, 300 sec: 8322.3). Total num frames: 1830912. Throughput: 0: 2125.8. Samples: 458082. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:05,564][45457] Avg episode reward: [(0, '23.653')] +[2024-07-05 00:06:05,637][45720] Saving new best policy, reward=23.653! +[2024-07-05 00:06:06,627][45733] Updated weights for policy 0, policy_version 450 (0.0011) +[2024-07-05 00:06:10,562][45457] Fps is (10 sec: 8601.7, 60 sec: 8533.3, 300 sec: 8337.6). Total num frames: 1875968. Throughput: 0: 2123.7. Samples: 464334. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:10,563][45457] Avg episode reward: [(0, '22.495')] +[2024-07-05 00:06:11,493][45733] Updated weights for policy 0, policy_version 460 (0.0011) +[2024-07-05 00:06:15,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8465.2, 300 sec: 8334.5). Total num frames: 1916928. Throughput: 0: 2117.2. Samples: 476874. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:15,564][45457] Avg episode reward: [(0, '21.926')] +[2024-07-05 00:06:15,820][45720] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000000469_1921024.pth... +[2024-07-05 00:06:16,341][45733] Updated weights for policy 0, policy_version 470 (0.0011) +[2024-07-05 00:06:20,564][45457] Fps is (10 sec: 8191.2, 60 sec: 8465.0, 300 sec: 8331.4). Total num frames: 1957888. Throughput: 0: 2119.5. Samples: 489522. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:20,565][45457] Avg episode reward: [(0, '21.250')] +[2024-07-05 00:06:21,184][45733] Updated weights for policy 0, policy_version 480 (0.0011) +[2024-07-05 00:06:25,564][45457] Fps is (10 sec: 8600.8, 60 sec: 8533.2, 300 sec: 8345.6). Total num frames: 2002944. Throughput: 0: 2118.2. Samples: 496004. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:25,567][45457] Avg episode reward: [(0, '23.038')] +[2024-07-05 00:06:26,002][45733] Updated weights for policy 0, policy_version 490 (0.0011) +[2024-07-05 00:06:30,563][45457] Fps is (10 sec: 8602.4, 60 sec: 8465.2, 300 sec: 8342.5). Total num frames: 2043904. Throughput: 0: 2117.6. Samples: 508628. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:30,563][45457] Avg episode reward: [(0, '23.902')] +[2024-07-05 00:06:30,868][45720] Saving new best policy, reward=23.902! +[2024-07-05 00:06:30,870][45733] Updated weights for policy 0, policy_version 500 (0.0011) +[2024-07-05 00:06:35,563][45457] Fps is (10 sec: 8192.9, 60 sec: 8465.2, 300 sec: 8339.5). Total num frames: 2084864. Throughput: 0: 2103.4. Samples: 521034. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:35,563][45457] Avg episode reward: [(0, '24.194')] +[2024-07-05 00:06:35,771][45720] Saving new best policy, reward=24.194! +[2024-07-05 00:06:35,772][45733] Updated weights for policy 0, policy_version 510 (0.0011) +[2024-07-05 00:06:40,563][45457] Fps is (10 sec: 8191.9, 60 sec: 8465.1, 300 sec: 8336.6). Total num frames: 2125824. Throughput: 0: 2096.3. Samples: 527028. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:40,564][45457] Avg episode reward: [(0, '23.401')] +[2024-07-05 00:06:40,840][45733] Updated weights for policy 0, policy_version 520 (0.0012) +[2024-07-05 00:06:45,563][45457] Fps is (10 sec: 8192.0, 60 sec: 8396.8, 300 sec: 8333.8). Total num frames: 2166784. Throughput: 0: 2094.9. Samples: 539538. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:45,563][45457] Avg episode reward: [(0, '24.109')] +[2024-07-05 00:06:45,664][45733] Updated weights for policy 0, policy_version 530 (0.0011) +[2024-07-05 00:06:50,563][45457] Fps is (10 sec: 8192.1, 60 sec: 8396.8, 300 sec: 8331.1). Total num frames: 2207744. Throughput: 0: 2092.8. Samples: 552258. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:50,564][45457] Avg episode reward: [(0, '25.860')] +[2024-07-05 00:06:50,624][45720] Saving new best policy, reward=25.860! +[2024-07-05 00:06:50,627][45733] Updated weights for policy 0, policy_version 540 (0.0012) +[2024-07-05 00:06:55,562][45457] Fps is (10 sec: 8192.0, 60 sec: 8328.6, 300 sec: 8328.5). Total num frames: 2248704. Throughput: 0: 2092.0. Samples: 558472. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:06:55,563][45457] Avg episode reward: [(0, '24.812')] +[2024-07-05 00:06:55,568][45733] Updated weights for policy 0, policy_version 550 (0.0012) +[2024-07-05 00:07:00,430][45733] Updated weights for policy 0, policy_version 560 (0.0011) +[2024-07-05 00:07:00,563][45457] Fps is (10 sec: 8601.6, 60 sec: 8396.8, 300 sec: 8340.9). Total num frames: 2293760. Throughput: 0: 2090.4. Samples: 570942. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:00,563][45457] Avg episode reward: [(0, '25.045')] +[2024-07-05 00:07:05,264][45733] Updated weights for policy 0, policy_version 570 (0.0011) +[2024-07-05 00:07:05,563][45457] Fps is (10 sec: 8601.6, 60 sec: 8396.8, 300 sec: 8338.3). Total num frames: 2334720. Throughput: 0: 2088.2. Samples: 583488. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:05,564][45457] Avg episode reward: [(0, '24.328')] +[2024-07-05 00:07:10,148][45733] Updated weights for policy 0, policy_version 580 (0.0011) +[2024-07-05 00:07:10,563][45457] Fps is (10 sec: 8192.0, 60 sec: 8328.5, 300 sec: 8335.7). Total num frames: 2375680. Throughput: 0: 2085.9. Samples: 589866. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:10,564][45457] Avg episode reward: [(0, '24.134')] +[2024-07-05 00:07:14,973][45733] Updated weights for policy 0, policy_version 590 (0.0012) +[2024-07-05 00:07:15,564][45457] Fps is (10 sec: 8600.8, 60 sec: 8396.7, 300 sec: 8347.3). Total num frames: 2420736. Throughput: 0: 2090.0. Samples: 602678. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:15,566][45457] Avg episode reward: [(0, '24.807')] +[2024-07-05 00:07:19,803][45733] Updated weights for policy 0, policy_version 600 (0.0012) +[2024-07-05 00:07:20,562][45457] Fps is (10 sec: 8601.7, 60 sec: 8396.9, 300 sec: 8344.7). Total num frames: 2461696. Throughput: 0: 2093.0. Samples: 615220. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:20,564][45457] Avg episode reward: [(0, '23.460')] +[2024-07-05 00:07:24,650][45733] Updated weights for policy 0, policy_version 610 (0.0012) +[2024-07-05 00:07:25,562][45457] Fps is (10 sec: 8192.8, 60 sec: 8328.7, 300 sec: 8483.6). Total num frames: 2502656. Throughput: 0: 2099.9. Samples: 621524. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:25,563][45457] Avg episode reward: [(0, '23.811')] +[2024-07-05 00:07:29,488][45733] Updated weights for policy 0, policy_version 620 (0.0012) +[2024-07-05 00:07:30,562][45457] Fps is (10 sec: 8601.6, 60 sec: 8396.8, 300 sec: 8511.3). Total num frames: 2547712. Throughput: 0: 2107.1. Samples: 634356. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:30,563][45457] Avg episode reward: [(0, '23.031')] +[2024-07-05 00:07:34,368][45733] Updated weights for policy 0, policy_version 630 (0.0011) +[2024-07-05 00:07:35,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8396.8, 300 sec: 8497.5). Total num frames: 2588672. Throughput: 0: 2103.0. Samples: 646892. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:35,563][45457] Avg episode reward: [(0, '24.536')] +[2024-07-05 00:07:39,183][45733] Updated weights for policy 0, policy_version 640 (0.0012) +[2024-07-05 00:07:40,562][45457] Fps is (10 sec: 8192.0, 60 sec: 8396.8, 300 sec: 8483.6). Total num frames: 2629632. Throughput: 0: 2106.5. Samples: 653264. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:40,563][45457] Avg episode reward: [(0, '26.081')] +[2024-07-05 00:07:40,612][45720] Saving new best policy, reward=26.081! +[2024-07-05 00:07:44,021][45733] Updated weights for policy 0, policy_version 650 (0.0012) +[2024-07-05 00:07:45,564][45457] Fps is (10 sec: 8600.5, 60 sec: 8464.9, 300 sec: 8497.4). Total num frames: 2674688. Throughput: 0: 2113.8. Samples: 666066. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:45,567][45457] Avg episode reward: [(0, '25.760')] +[2024-07-05 00:07:48,838][45733] Updated weights for policy 0, policy_version 660 (0.0011) +[2024-07-05 00:07:50,562][45457] Fps is (10 sec: 8601.6, 60 sec: 8465.1, 300 sec: 8483.6). Total num frames: 2715648. Throughput: 0: 2115.0. Samples: 678662. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:50,563][45457] Avg episode reward: [(0, '23.094')] +[2024-07-05 00:07:53,710][45733] Updated weights for policy 0, policy_version 670 (0.0012) +[2024-07-05 00:07:55,563][45457] Fps is (10 sec: 8193.0, 60 sec: 8465.1, 300 sec: 8469.7). Total num frames: 2756608. Throughput: 0: 2114.3. Samples: 685008. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:07:55,564][45457] Avg episode reward: [(0, '23.225')] +[2024-07-05 00:07:58,542][45733] Updated weights for policy 0, policy_version 680 (0.0012) +[2024-07-05 00:08:00,563][45457] Fps is (10 sec: 8600.9, 60 sec: 8465.0, 300 sec: 8483.6). Total num frames: 2801664. Throughput: 0: 2114.7. Samples: 697840. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:00,566][45457] Avg episode reward: [(0, '22.793')] +[2024-07-05 00:08:03,308][45733] Updated weights for policy 0, policy_version 690 (0.0012) +[2024-07-05 00:08:05,563][45457] Fps is (10 sec: 8601.6, 60 sec: 8465.1, 300 sec: 8469.7). Total num frames: 2842624. Throughput: 0: 2117.8. Samples: 710522. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:05,564][45457] Avg episode reward: [(0, '25.394')] +[2024-07-05 00:08:08,057][45733] Updated weights for policy 0, policy_version 700 (0.0012) +[2024-07-05 00:08:10,563][45457] Fps is (10 sec: 8602.2, 60 sec: 8533.3, 300 sec: 8483.6). Total num frames: 2887680. Throughput: 0: 2126.8. Samples: 717228. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:10,563][45457] Avg episode reward: [(0, '24.393')] +[2024-07-05 00:08:12,825][45733] Updated weights for policy 0, policy_version 710 (0.0012) +[2024-07-05 00:08:15,563][45457] Fps is (10 sec: 8601.6, 60 sec: 8465.2, 300 sec: 8469.7). Total num frames: 2928640. Throughput: 0: 2124.8. Samples: 729972. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:15,564][45457] Avg episode reward: [(0, '24.082')] +[2024-07-05 00:08:15,720][45720] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000000716_2932736.pth... +[2024-07-05 00:08:15,806][45720] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000000221_905216.pth +[2024-07-05 00:08:17,693][45733] Updated weights for policy 0, policy_version 720 (0.0012) +[2024-07-05 00:08:20,562][45457] Fps is (10 sec: 8192.0, 60 sec: 8465.1, 300 sec: 8469.7). Total num frames: 2969600. Throughput: 0: 2131.3. Samples: 742800. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:20,563][45457] Avg episode reward: [(0, '25.008')] +[2024-07-05 00:08:22,512][45733] Updated weights for policy 0, policy_version 730 (0.0011) +[2024-07-05 00:08:25,563][45457] Fps is (10 sec: 8601.7, 60 sec: 8533.3, 300 sec: 8469.7). Total num frames: 3014656. Throughput: 0: 2129.5. Samples: 749090. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:25,564][45457] Avg episode reward: [(0, '24.908')] +[2024-07-05 00:08:27,306][45733] Updated weights for policy 0, policy_version 740 (0.0012) +[2024-07-05 00:08:30,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8465.1, 300 sec: 8469.7). Total num frames: 3055616. Throughput: 0: 2126.9. Samples: 761774. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:30,564][45457] Avg episode reward: [(0, '23.716')] +[2024-07-05 00:08:32,134][45733] Updated weights for policy 0, policy_version 750 (0.0012) +[2024-07-05 00:08:35,563][45457] Fps is (10 sec: 8601.6, 60 sec: 8533.3, 300 sec: 8469.7). Total num frames: 3100672. Throughput: 0: 2134.2. Samples: 774702. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:35,563][45457] Avg episode reward: [(0, '23.904')] +[2024-07-05 00:08:36,933][45733] Updated weights for policy 0, policy_version 760 (0.0011) +[2024-07-05 00:08:40,562][45457] Fps is (10 sec: 8601.7, 60 sec: 8533.3, 300 sec: 8469.7). Total num frames: 3141632. Throughput: 0: 2133.9. Samples: 781034. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:40,564][45457] Avg episode reward: [(0, '24.201')] +[2024-07-05 00:08:41,784][45733] Updated weights for policy 0, policy_version 770 (0.0012) +[2024-07-05 00:08:45,564][45457] Fps is (10 sec: 8191.3, 60 sec: 8465.1, 300 sec: 8455.8). Total num frames: 3182592. Throughput: 0: 2135.2. Samples: 793924. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:08:45,568][45457] Avg episode reward: [(0, '25.184')] +[2024-07-05 00:08:46,559][45733] Updated weights for policy 0, policy_version 780 (0.0012) +[2024-07-05 00:08:50,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8533.3, 300 sec: 8469.7). Total num frames: 3227648. Throughput: 0: 2133.8. Samples: 806542. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:50,564][45457] Avg episode reward: [(0, '25.337')] +[2024-07-05 00:08:51,431][45733] Updated weights for policy 0, policy_version 790 (0.0011) +[2024-07-05 00:08:55,564][45457] Fps is (10 sec: 8601.6, 60 sec: 8533.2, 300 sec: 8455.8). Total num frames: 3268608. Throughput: 0: 2123.4. Samples: 812782. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:08:55,568][45457] Avg episode reward: [(0, '27.362')] +[2024-07-05 00:08:55,781][45720] Saving new best policy, reward=27.362! +[2024-07-05 00:08:56,340][45733] Updated weights for policy 0, policy_version 800 (0.0012) +[2024-07-05 00:09:00,563][45457] Fps is (10 sec: 8192.1, 60 sec: 8465.2, 300 sec: 8455.8). Total num frames: 3309568. Throughput: 0: 2118.3. Samples: 825294. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:00,564][45457] Avg episode reward: [(0, '27.047')] +[2024-07-05 00:09:01,150][45733] Updated weights for policy 0, policy_version 810 (0.0012) +[2024-07-05 00:09:05,562][45457] Fps is (10 sec: 8602.4, 60 sec: 8533.4, 300 sec: 8455.8). Total num frames: 3354624. Throughput: 0: 2117.7. Samples: 838096. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:05,564][45457] Avg episode reward: [(0, '26.206')] +[2024-07-05 00:09:06,047][45733] Updated weights for policy 0, policy_version 820 (0.0012) +[2024-07-05 00:09:10,562][45457] Fps is (10 sec: 8601.7, 60 sec: 8465.1, 300 sec: 8455.8). Total num frames: 3395584. Throughput: 0: 2113.1. Samples: 844180. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:10,563][45457] Avg episode reward: [(0, '25.412')] +[2024-07-05 00:09:11,010][45733] Updated weights for policy 0, policy_version 830 (0.0012) +[2024-07-05 00:09:15,563][45457] Fps is (10 sec: 8191.9, 60 sec: 8465.1, 300 sec: 8455.8). Total num frames: 3436544. Throughput: 0: 2112.1. Samples: 856818. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:15,564][45457] Avg episode reward: [(0, '24.555')] +[2024-07-05 00:09:15,927][45733] Updated weights for policy 0, policy_version 840 (0.0012) +[2024-07-05 00:09:20,562][45457] Fps is (10 sec: 8192.0, 60 sec: 8465.1, 300 sec: 8455.8). Total num frames: 3477504. Throughput: 0: 2102.2. Samples: 869302. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:20,563][45457] Avg episode reward: [(0, '25.205')] +[2024-07-05 00:09:20,707][45733] Updated weights for policy 0, policy_version 850 (0.0011) +[2024-07-05 00:09:25,496][45733] Updated weights for policy 0, policy_version 860 (0.0012) +[2024-07-05 00:09:25,563][45457] Fps is (10 sec: 8601.7, 60 sec: 8465.1, 300 sec: 8469.7). Total num frames: 3522560. Throughput: 0: 2108.7. Samples: 875924. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:25,563][45457] Avg episode reward: [(0, '25.967')] +[2024-07-05 00:09:30,329][45733] Updated weights for policy 0, policy_version 870 (0.0012) +[2024-07-05 00:09:30,563][45457] Fps is (10 sec: 8601.4, 60 sec: 8465.0, 300 sec: 8455.8). Total num frames: 3563520. Throughput: 0: 2102.4. Samples: 888530. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:30,564][45457] Avg episode reward: [(0, '26.150')] +[2024-07-05 00:09:35,125][45733] Updated weights for policy 0, policy_version 880 (0.0012) +[2024-07-05 00:09:35,563][45457] Fps is (10 sec: 8192.0, 60 sec: 8396.8, 300 sec: 8455.8). Total num frames: 3604480. Throughput: 0: 2109.8. Samples: 901482. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:35,564][45457] Avg episode reward: [(0, '28.216')] +[2024-07-05 00:09:35,606][45720] Saving new best policy, reward=28.216! +[2024-07-05 00:09:39,996][45733] Updated weights for policy 0, policy_version 890 (0.0012) +[2024-07-05 00:09:40,563][45457] Fps is (10 sec: 8601.7, 60 sec: 8465.0, 300 sec: 8455.8). Total num frames: 3649536. Throughput: 0: 2108.9. Samples: 907680. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:40,564][45457] Avg episode reward: [(0, '27.075')] +[2024-07-05 00:09:44,929][45733] Updated weights for policy 0, policy_version 900 (0.0012) +[2024-07-05 00:09:45,563][45457] Fps is (10 sec: 8601.0, 60 sec: 8465.1, 300 sec: 8455.8). Total num frames: 3690496. Throughput: 0: 2110.1. Samples: 920250. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:45,564][45457] Avg episode reward: [(0, '28.084')] +[2024-07-05 00:09:49,793][45733] Updated weights for policy 0, policy_version 910 (0.0012) +[2024-07-05 00:09:50,564][45457] Fps is (10 sec: 8191.4, 60 sec: 8396.7, 300 sec: 8455.8). Total num frames: 3731456. Throughput: 0: 2102.3. Samples: 932702. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:50,568][45457] Avg episode reward: [(0, '26.819')] +[2024-07-05 00:09:54,696][45733] Updated weights for policy 0, policy_version 920 (0.0012) +[2024-07-05 00:09:55,564][45457] Fps is (10 sec: 8191.9, 60 sec: 8396.8, 300 sec: 8441.9). Total num frames: 3772416. Throughput: 0: 2105.6. Samples: 938934. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:09:55,566][45457] Avg episode reward: [(0, '27.790')] +[2024-07-05 00:09:59,567][45733] Updated weights for policy 0, policy_version 930 (0.0011) +[2024-07-05 00:10:00,563][45457] Fps is (10 sec: 8602.3, 60 sec: 8465.1, 300 sec: 8455.8). Total num frames: 3817472. Throughput: 0: 2109.3. Samples: 951736. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:10:00,563][45457] Avg episode reward: [(0, '27.043')] +[2024-07-05 00:10:04,437][45733] Updated weights for policy 0, policy_version 940 (0.0012) +[2024-07-05 00:10:05,563][45457] Fps is (10 sec: 8602.3, 60 sec: 8396.8, 300 sec: 8455.8). Total num frames: 3858432. Throughput: 0: 2111.0. Samples: 964298. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:10:05,563][45457] Avg episode reward: [(0, '26.886')] +[2024-07-05 00:10:09,362][45733] Updated weights for policy 0, policy_version 950 (0.0012) +[2024-07-05 00:10:10,563][45457] Fps is (10 sec: 8192.0, 60 sec: 8396.8, 300 sec: 8441.9). Total num frames: 3899392. Throughput: 0: 2102.3. Samples: 970528. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:10:10,564][45457] Avg episode reward: [(0, '28.959')] +[2024-07-05 00:10:10,792][45720] Saving new best policy, reward=28.959! +[2024-07-05 00:10:14,219][45733] Updated weights for policy 0, policy_version 960 (0.0011) +[2024-07-05 00:10:15,563][45457] Fps is (10 sec: 8191.4, 60 sec: 8396.7, 300 sec: 8441.9). Total num frames: 3940352. Throughput: 0: 2100.8. Samples: 983066. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:10:15,565][45457] Avg episode reward: [(0, '28.199')] +[2024-07-05 00:10:15,710][45720] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000000963_3944448.pth... +[2024-07-05 00:10:15,818][45720] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000000469_1921024.pth +[2024-07-05 00:10:19,263][45733] Updated weights for policy 0, policy_version 970 (0.0012) +[2024-07-05 00:10:20,562][45457] Fps is (10 sec: 8192.1, 60 sec: 8396.8, 300 sec: 8441.9). Total num frames: 3981312. Throughput: 0: 2085.1. Samples: 995312. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 00:10:20,563][45457] Avg episode reward: [(0, '27.336')] +[2024-07-05 00:10:24,141][45733] Updated weights for policy 0, policy_version 980 (0.0012) +[2024-07-05 00:10:25,563][45457] Fps is (10 sec: 8192.7, 60 sec: 8328.5, 300 sec: 8428.1). Total num frames: 4022272. Throughput: 0: 2093.6. Samples: 1001892. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:10:25,564][45457] Avg episode reward: [(0, '26.246')] +[2024-07-05 00:10:29,082][45733] Updated weights for policy 0, policy_version 990 (0.0012) +[2024-07-05 00:10:30,562][45457] Fps is (10 sec: 8601.5, 60 sec: 8396.8, 300 sec: 8442.0). Total num frames: 4067328. Throughput: 0: 2090.6. Samples: 1014326. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:10:30,563][45457] Avg episode reward: [(0, '28.156')] +[2024-07-05 00:10:33,899][45733] Updated weights for policy 0, policy_version 1000 (0.0011) +[2024-07-05 00:10:35,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8396.8, 300 sec: 8441.9). Total num frames: 4108288. Throughput: 0: 2092.2. Samples: 1026850. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:10:35,564][45457] Avg episode reward: [(0, '28.749')] +[2024-07-05 00:10:38,767][45733] Updated weights for policy 0, policy_version 1010 (0.0012) +[2024-07-05 00:10:40,563][45457] Fps is (10 sec: 8192.0, 60 sec: 8328.5, 300 sec: 8428.0). Total num frames: 4149248. Throughput: 0: 2091.6. Samples: 1033056. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:10:40,564][45457] Avg episode reward: [(0, '28.596')] +[2024-07-05 00:10:43,658][45733] Updated weights for policy 0, policy_version 1020 (0.0012) +[2024-07-05 00:10:45,563][45457] Fps is (10 sec: 8192.1, 60 sec: 8328.6, 300 sec: 8428.0). Total num frames: 4190208. Throughput: 0: 2093.1. Samples: 1045926. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:10:45,563][45457] Avg episode reward: [(0, '27.802')] +[2024-07-05 00:10:48,601][45733] Updated weights for policy 0, policy_version 1030 (0.0012) +[2024-07-05 00:10:50,562][45457] Fps is (10 sec: 8601.7, 60 sec: 8396.9, 300 sec: 8428.0). Total num frames: 4235264. Throughput: 0: 2091.1. Samples: 1058398. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:10:50,563][45457] Avg episode reward: [(0, '25.825')] +[2024-07-05 00:10:53,300][45733] Updated weights for policy 0, policy_version 1040 (0.0012) +[2024-07-05 00:10:55,563][45457] Fps is (10 sec: 8601.5, 60 sec: 8396.9, 300 sec: 8428.0). Total num frames: 4276224. Throughput: 0: 2094.8. Samples: 1064792. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:10:55,564][45457] Avg episode reward: [(0, '25.309')] +[2024-07-05 00:10:58,020][45733] Updated weights for policy 0, policy_version 1050 (0.0014) +[2024-07-05 00:11:00,563][45457] Fps is (10 sec: 8601.6, 60 sec: 8396.8, 300 sec: 8441.9). Total num frames: 4321280. Throughput: 0: 2106.4. Samples: 1077852. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:11:00,564][45457] Avg episode reward: [(0, '26.441')] +[2024-07-05 00:11:02,854][45733] Updated weights for policy 0, policy_version 1060 (0.0023) +[2024-07-05 00:11:05,564][45457] Fps is (10 sec: 7372.2, 60 sec: 8191.9, 300 sec: 8386.4). Total num frames: 4349952. Throughput: 0: 2055.4. Samples: 1087806. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:11:05,569][45457] Avg episode reward: [(0, '26.663')] +[2024-07-05 00:11:10,563][45457] Fps is (10 sec: 3686.1, 60 sec: 7645.8, 300 sec: 8275.3). Total num frames: 4358144. Throughput: 0: 1959.1. Samples: 1090052. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:11:10,568][45457] Avg episode reward: [(0, '26.834')] +[2024-07-05 00:11:15,564][45457] Fps is (10 sec: 2457.6, 60 sec: 7236.3, 300 sec: 8192.0). Total num frames: 4374528. Throughput: 0: 1770.4. Samples: 1093996. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 00:11:15,568][45457] Avg episode reward: [(0, '27.659')] +[2024-07-05 00:11:18,343][45733] Updated weights for policy 0, policy_version 1070 (0.0090) +[2024-07-05 00:11:20,563][45457] Fps is (10 sec: 2867.2, 60 sec: 6758.3, 300 sec: 8080.9). Total num frames: 4386816. Throughput: 0: 1579.4. Samples: 1097926. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 00:11:20,568][45457] Avg episode reward: [(0, '27.766')] +[2024-07-05 00:11:25,564][45457] Fps is (10 sec: 2457.6, 60 sec: 6280.5, 300 sec: 7983.7). Total num frames: 4399104. Throughput: 0: 1485.8. Samples: 1099918. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 00:11:25,568][45457] Avg episode reward: [(0, '27.233')] +[2024-07-05 00:11:30,564][45457] Fps is (10 sec: 2457.5, 60 sec: 5734.3, 300 sec: 7886.5). Total num frames: 4411392. Throughput: 0: 1287.7. Samples: 1103874. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 00:11:30,567][45457] Avg episode reward: [(0, '27.273')] +[2024-07-05 00:11:33,820][45733] Updated weights for policy 0, policy_version 1080 (0.0088) +[2024-07-05 00:11:35,564][45457] Fps is (10 sec: 2867.2, 60 sec: 5324.7, 300 sec: 7803.2). Total num frames: 4427776. Throughput: 0: 1099.1. Samples: 1107860. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 00:11:35,569][45457] Avg episode reward: [(0, '25.135')] +[2024-07-05 00:11:40,564][45457] Fps is (10 sec: 2867.2, 60 sec: 4846.9, 300 sec: 7706.0). Total num frames: 4440064. Throughput: 0: 1001.4. Samples: 1109854. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 00:11:40,568][45457] Avg episode reward: [(0, '24.835')] +[2024-07-05 00:11:45,564][45457] Fps is (10 sec: 2457.5, 60 sec: 4369.0, 300 sec: 7608.8). Total num frames: 4452352. Throughput: 0: 799.0. Samples: 1113810. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 00:11:45,568][45457] Avg episode reward: [(0, '25.513')] +[2024-07-05 00:11:49,287][45733] Updated weights for policy 0, policy_version 1090 (0.0086) +[2024-07-05 00:11:50,563][45457] Fps is (10 sec: 2457.6, 60 sec: 3822.9, 300 sec: 7511.6). Total num frames: 4464640. Throughput: 0: 666.1. Samples: 1117780. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 00:11:50,567][45457] Avg episode reward: [(0, '25.722')] +[2024-07-05 00:11:55,564][45457] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 7414.4). Total num frames: 4481024. Throughput: 0: 660.4. Samples: 1119770. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 00:11:55,569][45457] Avg episode reward: [(0, '25.471')] +[2024-07-05 00:12:00,564][45457] Fps is (10 sec: 2867.1, 60 sec: 2867.2, 300 sec: 7317.2). Total num frames: 4493312. Throughput: 0: 659.1. Samples: 1123656. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 00:12:00,569][45457] Avg episode reward: [(0, '26.600')] +[2024-07-05 00:12:04,895][45733] Updated weights for policy 0, policy_version 1100 (0.0089) +[2024-07-05 00:12:05,564][45457] Fps is (10 sec: 2457.7, 60 sec: 2594.1, 300 sec: 7220.0). Total num frames: 4505600. Throughput: 0: 656.1. Samples: 1127452. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 00:12:05,568][45457] Avg episode reward: [(0, '26.292')] +[2024-07-05 00:12:10,563][45457] Fps is (10 sec: 3686.7, 60 sec: 2867.2, 300 sec: 7150.7). Total num frames: 4530176. Throughput: 0: 664.4. Samples: 1129816. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:12:10,564][45457] Avg episode reward: [(0, '26.955')] +[2024-07-05 00:12:12,183][45733] Updated weights for policy 0, policy_version 1110 (0.0041) +[2024-07-05 00:12:15,563][45457] Fps is (10 sec: 6963.8, 60 sec: 3345.1, 300 sec: 7164.5). Total num frames: 4575232. Throughput: 0: 829.2. Samples: 1141186. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:12:15,564][45457] Avg episode reward: [(0, '29.109')] +[2024-07-05 00:12:15,568][45720] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001117_4575232.pth... +[2024-07-05 00:12:15,695][45720] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000000716_2932736.pth +[2024-07-05 00:12:15,710][45720] Saving new best policy, reward=29.109! +[2024-07-05 00:12:17,018][45733] Updated weights for policy 0, policy_version 1120 (0.0015) +[2024-07-05 00:12:20,563][45457] Fps is (10 sec: 8601.7, 60 sec: 3823.0, 300 sec: 7164.5). Total num frames: 4616192. Throughput: 0: 1025.5. Samples: 1154008. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 00:12:20,564][45457] Avg episode reward: [(0, '28.579')] +[2024-07-05 00:12:21,727][45733] Updated weights for policy 0, policy_version 1130 (0.0012) +[2024-07-05 00:12:25,562][45457] Fps is (10 sec: 8601.7, 60 sec: 4369.1, 300 sec: 7164.5). Total num frames: 4661248. Throughput: 0: 1128.0. Samples: 1160612. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 00:12:25,563][45457] Avg episode reward: [(0, '27.690')] +[2024-07-05 00:12:26,489][45733] Updated weights for policy 0, policy_version 1140 (0.0012) +[2024-07-05 00:12:30,562][45457] Fps is (10 sec: 8601.7, 60 sec: 4847.0, 300 sec: 7164.5). Total num frames: 4702208. Throughput: 0: 1323.5. Samples: 1173364. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 00:12:30,563][45457] Avg episode reward: [(0, '28.016')] +[2024-07-05 00:12:31,297][45733] Updated weights for policy 0, policy_version 1150 (0.0012) +[2024-07-05 00:12:35,563][45457] Fps is (10 sec: 8191.3, 60 sec: 5256.6, 300 sec: 7164.5). Total num frames: 4743168. Throughput: 0: 1524.7. Samples: 1186390. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 00:12:35,568][45457] Avg episode reward: [(0, '29.795')] +[2024-07-05 00:12:35,576][45720] Saving new best policy, reward=29.795! +[2024-07-05 00:12:36,135][45733] Updated weights for policy 0, policy_version 1160 (0.0012) +[2024-07-05 00:12:40,563][45457] Fps is (10 sec: 8600.7, 60 sec: 5802.7, 300 sec: 7164.5). Total num frames: 4788224. Throughput: 0: 1619.8. Samples: 1192662. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 00:12:40,565][45457] Avg episode reward: [(0, '30.795')] +[2024-07-05 00:12:40,865][45720] Saving new best policy, reward=30.795! +[2024-07-05 00:12:40,868][45733] Updated weights for policy 0, policy_version 1170 (0.0012) +[2024-07-05 00:12:45,563][45457] Fps is (10 sec: 8602.2, 60 sec: 6280.7, 300 sec: 7164.5). Total num frames: 4829184. Throughput: 0: 1813.1. Samples: 1205244. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 00:12:45,563][45457] Avg episode reward: [(0, '30.346')] +[2024-07-05 00:12:45,736][45733] Updated weights for policy 0, policy_version 1180 (0.0011) +[2024-07-05 00:12:50,429][45733] Updated weights for policy 0, policy_version 1190 (0.0011) +[2024-07-05 00:12:50,562][45457] Fps is (10 sec: 8602.5, 60 sec: 6826.8, 300 sec: 7178.4). Total num frames: 4874240. Throughput: 0: 2019.3. Samples: 1218318. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 00:12:50,563][45457] Avg episode reward: [(0, '29.584')] +[2024-07-05 00:12:55,286][45733] Updated weights for policy 0, policy_version 1200 (0.0011) +[2024-07-05 00:12:55,564][45457] Fps is (10 sec: 8600.8, 60 sec: 7236.3, 300 sec: 7164.5). Total num frames: 4915200. Throughput: 0: 2105.2. Samples: 1224550. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:12:55,568][45457] Avg episode reward: [(0, '27.384')] +[2024-07-05 00:13:00,122][45733] Updated weights for policy 0, policy_version 1210 (0.0012) +[2024-07-05 00:13:00,562][45457] Fps is (10 sec: 8192.0, 60 sec: 7714.3, 300 sec: 7164.5). Total num frames: 4956160. Throughput: 0: 2139.0. Samples: 1237442. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:13:00,563][45457] Avg episode reward: [(0, '26.764')] +[2024-07-05 00:13:05,041][45733] Updated weights for policy 0, policy_version 1220 (0.0012) +[2024-07-05 00:13:05,563][45457] Fps is (10 sec: 8602.3, 60 sec: 8260.4, 300 sec: 7164.5). Total num frames: 5001216. Throughput: 0: 2131.8. Samples: 1249940. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 00:13:05,564][45457] Avg episode reward: [(0, '26.776')] +[2024-07-05 00:13:06,018][45720] Stopping Batcher_0... +[2024-07-05 00:13:06,019][45720] Loop batcher_evt_loop terminating... +[2024-07-05 00:13:06,019][45720] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001222_5005312.pth... +[2024-07-05 00:13:06,021][45457] Component Batcher_0 stopped! +[2024-07-05 00:13:06,027][45735] Stopping RolloutWorker_w1... +[2024-07-05 00:13:06,027][45734] Stopping RolloutWorker_w0... +[2024-07-05 00:13:06,027][45740] Stopping RolloutWorker_w6... +[2024-07-05 00:13:06,027][45739] Stopping RolloutWorker_w5... +[2024-07-05 00:13:06,027][45741] Stopping RolloutWorker_w7... +[2024-07-05 00:13:06,027][45735] Loop rollout_proc1_evt_loop terminating... +[2024-07-05 00:13:06,027][45738] Stopping RolloutWorker_w4... +[2024-07-05 00:13:06,027][45734] Loop rollout_proc0_evt_loop terminating... +[2024-07-05 00:13:06,027][45740] Loop rollout_proc6_evt_loop terminating... +[2024-07-05 00:13:06,027][45739] Loop rollout_proc5_evt_loop terminating... +[2024-07-05 00:13:06,028][45741] Loop rollout_proc7_evt_loop terminating... +[2024-07-05 00:13:06,028][45738] Loop rollout_proc4_evt_loop terminating... +[2024-07-05 00:13:06,028][45737] Stopping RolloutWorker_w3... +[2024-07-05 00:13:06,028][45737] Loop rollout_proc3_evt_loop terminating... +[2024-07-05 00:13:06,028][45736] Stopping RolloutWorker_w2... +[2024-07-05 00:13:06,027][45457] Component RolloutWorker_w1 stopped! +[2024-07-05 00:13:06,028][45736] Loop rollout_proc2_evt_loop terminating... +[2024-07-05 00:13:06,028][45457] Component RolloutWorker_w0 stopped! +[2024-07-05 00:13:06,029][45457] Component RolloutWorker_w6 stopped! +[2024-07-05 00:13:06,030][45457] Component RolloutWorker_w5 stopped! +[2024-07-05 00:13:06,030][45457] Component RolloutWorker_w7 stopped! +[2024-07-05 00:13:06,031][45457] Component RolloutWorker_w4 stopped! +[2024-07-05 00:13:06,032][45457] Component RolloutWorker_w3 stopped! +[2024-07-05 00:13:06,032][45457] Component RolloutWorker_w2 stopped! +[2024-07-05 00:13:06,051][45733] Weights refcount: 2 0 +[2024-07-05 00:13:06,053][45733] Stopping InferenceWorker_p0-w0... +[2024-07-05 00:13:06,053][45733] Loop inference_proc0-0_evt_loop terminating... +[2024-07-05 00:13:06,053][45457] Component InferenceWorker_p0-w0 stopped! +[2024-07-05 00:13:06,115][45720] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000000963_3944448.pth +[2024-07-05 00:13:06,125][45720] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001222_5005312.pth... +[2024-07-05 00:13:06,251][45720] Stopping LearnerWorker_p0... +[2024-07-05 00:13:06,252][45720] Loop learner_proc0_evt_loop terminating... +[2024-07-05 00:13:06,251][45457] Component LearnerWorker_p0 stopped! +[2024-07-05 00:13:06,253][45457] Waiting for process learner_proc0 to stop... +[2024-07-05 00:13:07,243][45457] Waiting for process inference_proc0-0 to join... +[2024-07-05 00:13:07,245][45457] Waiting for process rollout_proc0 to join... +[2024-07-05 00:13:07,246][45457] Waiting for process rollout_proc1 to join... +[2024-07-05 00:13:07,247][45457] Waiting for process rollout_proc2 to join... +[2024-07-05 00:13:07,247][45457] Waiting for process rollout_proc3 to join... +[2024-07-05 00:13:07,248][45457] Waiting for process rollout_proc4 to join... +[2024-07-05 00:13:07,249][45457] Waiting for process rollout_proc5 to join... +[2024-07-05 00:13:07,249][45457] Waiting for process rollout_proc6 to join... +[2024-07-05 00:13:07,250][45457] Waiting for process rollout_proc7 to join... +[2024-07-05 00:13:07,250][45457] Batcher 0 profile tree view: +batching: 10.6002, releasing_batches: 0.0368 +[2024-07-05 00:13:07,251][45457] InferenceWorker_p0-w0 profile tree view: wait_policy: 0.0000 - wait_policy_total: 49.6346 -update_model: 16.5131 - weight_update: 0.0014 -one_step: 0.0024 - handle_policy_step: 960.5359 - deserialize: 72.9740, stack: 5.4390, obs_to_device_normalize: 227.9648, forward: 450.2314, send_messages: 48.9727 - prepare_outputs: 121.9974 - to_cpu: 73.1486 -[2024-07-05 12:10:55,753][25826] Learner 0 profile tree view: -misc: 0.0236, prepare_batch: 102.7532 -train: 229.6119 - epoch_init: 0.0198, minibatch_init: 0.0280, losses_postprocess: 1.3501, kl_divergence: 1.4897, after_optimizer: 1.5800 - calculate_losses: 81.9569 - losses_init: 0.0123, forward_head: 3.6887, bptt_initial: 62.5628, tail: 3.3569, advantages_returns: 1.0618, losses: 5.2979 - bptt: 5.1074 - bptt_forward_core: 4.8908 - update: 140.7312 - clip: 5.2909 -[2024-07-05 12:10:55,753][25826] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.4572, enqueue_policy_requests: 30.7168, env_step: 493.1736, overhead: 50.5816, complete_rollouts: 1.2357 -save_policy_outputs: 37.2109 - split_output_tensors: 17.2829 -[2024-07-05 12:10:55,753][25826] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.4814, enqueue_policy_requests: 31.9675, env_step: 507.5894, overhead: 52.5744, complete_rollouts: 1.2642 -save_policy_outputs: 37.6105 - split_output_tensors: 17.6837 -[2024-07-05 12:10:55,754][25826] Loop Runner_EvtLoop terminating... -[2024-07-05 12:10:55,754][25826] Runner profile tree view: -main_loop: 1071.2399 -[2024-07-05 12:10:55,754][25826] Collected {0: 150011904}, FPS: 46670.9 -[2024-07-05 12:13:04,942][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 12:13:04,943][25826] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 12:13:04,944][25826] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 12:13:04,944][25826] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 12:13:04,944][25826] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 12:13:04,945][25826] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 12:13:04,945][25826] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 12:13:04,945][25826] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 12:13:04,946][25826] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 12:13:04,946][25826] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 12:13:04,946][25826] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 12:13:04,947][25826] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 12:13:04,947][25826] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 12:13:04,947][25826] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 12:13:04,948][25826] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 12:13:04,964][25826] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 12:13:04,966][25826] RunningMeanStd input shape: (1,) -[2024-07-05 12:13:04,979][25826] ConvEncoder: input_channels=3 -[2024-07-05 12:13:05,013][25826] Conv encoder output size: 512 -[2024-07-05 12:13:05,014][25826] Policy head output size: 512 -[2024-07-05 12:13:05,042][25826] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000020754_150011904.pth... -[2024-07-05 12:13:05,694][25826] Num frames 100... -[2024-07-05 12:13:05,754][25826] Num frames 200... -[2024-07-05 12:13:05,814][25826] Num frames 300... -[2024-07-05 12:13:05,875][25826] Num frames 400... -[2024-07-05 12:13:05,935][25826] Num frames 500... -[2024-07-05 12:13:05,994][25826] Num frames 600... -[2024-07-05 12:13:06,054][25826] Num frames 700... -[2024-07-05 12:13:06,113][25826] Num frames 800... -[2024-07-05 12:13:06,174][25826] Num frames 900... -[2024-07-05 12:13:06,234][25826] Num frames 1000... -[2024-07-05 12:13:06,293][25826] Num frames 1100... -[2024-07-05 12:13:06,352][25826] Num frames 1200... -[2024-07-05 12:13:06,413][25826] Num frames 1300... -[2024-07-05 12:13:06,473][25826] Num frames 1400... -[2024-07-05 12:13:06,535][25826] Num frames 1500... -[2024-07-05 12:13:06,597][25826] Num frames 1600... -[2024-07-05 12:13:06,677][25826] Num frames 1700... -[2024-07-05 12:13:06,743][25826] Num frames 1800... -[2024-07-05 12:13:06,807][25826] Num frames 1900... -[2024-07-05 12:13:06,875][25826] Num frames 2000... -[2024-07-05 12:13:06,942][25826] Num frames 2100... -[2024-07-05 12:13:06,993][25826] Avg episode rewards: #0: 62.999, true rewards: #0: 21.000 -[2024-07-05 12:13:06,994][25826] Avg episode reward: 62.999, avg true_objective: 21.000 -[2024-07-05 12:13:07,063][25826] Num frames 2200... -[2024-07-05 12:13:07,124][25826] Num frames 2300... -[2024-07-05 12:13:07,185][25826] Num frames 2400... -[2024-07-05 12:13:07,246][25826] Num frames 2500... -[2024-07-05 12:13:07,307][25826] Num frames 2600... -[2024-07-05 12:13:07,367][25826] Num frames 2700... -[2024-07-05 12:13:07,429][25826] Num frames 2800... -[2024-07-05 12:13:07,490][25826] Num frames 2900... -[2024-07-05 12:13:07,550][25826] Num frames 3000... -[2024-07-05 12:13:07,612][25826] Num frames 3100... -[2024-07-05 12:13:07,673][25826] Num frames 3200... -[2024-07-05 12:13:07,732][25826] Num frames 3300... -[2024-07-05 12:13:07,791][25826] Num frames 3400... -[2024-07-05 12:13:07,851][25826] Num frames 3500... -[2024-07-05 12:13:07,910][25826] Num frames 3600... -[2024-07-05 12:13:07,972][25826] Num frames 3700... -[2024-07-05 12:13:08,032][25826] Num frames 3800... -[2024-07-05 12:13:08,093][25826] Num frames 3900... -[2024-07-05 12:13:08,151][25826] Num frames 4000... -[2024-07-05 12:13:08,214][25826] Num frames 4100... -[2024-07-05 12:13:08,275][25826] Num frames 4200... -[2024-07-05 12:13:08,326][25826] Avg episode rewards: #0: 61.999, true rewards: #0: 21.000 -[2024-07-05 12:13:08,328][25826] Avg episode reward: 61.999, avg true_objective: 21.000 -[2024-07-05 12:13:08,393][25826] Num frames 4300... -[2024-07-05 12:13:08,452][25826] Num frames 4400... -[2024-07-05 12:13:08,513][25826] Num frames 4500... -[2024-07-05 12:13:08,573][25826] Num frames 4600... -[2024-07-05 12:13:08,630][25826] Num frames 4700... -[2024-07-05 12:13:08,689][25826] Num frames 4800... -[2024-07-05 12:13:08,751][25826] Num frames 4900... -[2024-07-05 12:13:08,814][25826] Num frames 5000... -[2024-07-05 12:13:08,879][25826] Num frames 5100... -[2024-07-05 12:13:08,970][25826] Avg episode rewards: #0: 48.199, true rewards: #0: 17.200 -[2024-07-05 12:13:08,971][25826] Avg episode reward: 48.199, avg true_objective: 17.200 -[2024-07-05 12:13:09,000][25826] Num frames 5200... -[2024-07-05 12:13:09,060][25826] Num frames 5300... -[2024-07-05 12:13:09,120][25826] Num frames 5400... -[2024-07-05 12:13:09,182][25826] Num frames 5500... -[2024-07-05 12:13:09,241][25826] Num frames 5600... -[2024-07-05 12:13:09,300][25826] Num frames 5700... -[2024-07-05 12:13:09,359][25826] Num frames 5800... -[2024-07-05 12:13:09,418][25826] Num frames 5900... -[2024-07-05 12:13:09,490][25826] Avg episode rewards: #0: 40.069, true rewards: #0: 14.820 -[2024-07-05 12:13:09,491][25826] Avg episode reward: 40.069, avg true_objective: 14.820 -[2024-07-05 12:13:09,538][25826] Num frames 6000... -[2024-07-05 12:13:09,599][25826] Num frames 6100... -[2024-07-05 12:13:09,667][25826] Num frames 6200... -[2024-07-05 12:13:09,735][25826] Num frames 6300... -[2024-07-05 12:13:09,795][25826] Num frames 6400... -[2024-07-05 12:13:09,854][25826] Num frames 6500... -[2024-07-05 12:13:09,915][25826] Num frames 6600... -[2024-07-05 12:13:09,975][25826] Num frames 6700... -[2024-07-05 12:13:10,034][25826] Num frames 6800... -[2024-07-05 12:13:10,094][25826] Num frames 6900... -[2024-07-05 12:13:10,153][25826] Num frames 7000... -[2024-07-05 12:13:10,213][25826] Num frames 7100... -[2024-07-05 12:13:10,272][25826] Num frames 7200... -[2024-07-05 12:13:10,332][25826] Num frames 7300... -[2024-07-05 12:13:10,393][25826] Num frames 7400... -[2024-07-05 12:13:10,454][25826] Num frames 7500... -[2024-07-05 12:13:10,514][25826] Num frames 7600... -[2024-07-05 12:13:10,575][25826] Num frames 7700... -[2024-07-05 12:13:10,635][25826] Num frames 7800... -[2024-07-05 12:13:10,699][25826] Num frames 7900... -[2024-07-05 12:13:10,760][25826] Num frames 8000... -[2024-07-05 12:13:10,832][25826] Avg episode rewards: #0: 45.455, true rewards: #0: 16.056 -[2024-07-05 12:13:10,834][25826] Avg episode reward: 45.455, avg true_objective: 16.056 -[2024-07-05 12:13:10,886][25826] Num frames 8100... -[2024-07-05 12:13:10,946][25826] Num frames 8200... -[2024-07-05 12:13:11,004][25826] Num frames 8300... -[2024-07-05 12:13:11,063][25826] Num frames 8400... -[2024-07-05 12:13:11,124][25826] Num frames 8500... -[2024-07-05 12:13:11,187][25826] Num frames 8600... -[2024-07-05 12:13:11,246][25826] Num frames 8700... -[2024-07-05 12:13:11,317][25826] Num frames 8800... -[2024-07-05 12:13:11,378][25826] Num frames 8900... -[2024-07-05 12:13:11,466][25826] Avg episode rewards: #0: 40.926, true rewards: #0: 14.927 -[2024-07-05 12:13:11,468][25826] Avg episode reward: 40.926, avg true_objective: 14.927 -[2024-07-05 12:13:11,503][25826] Num frames 9000... -[2024-07-05 12:13:11,565][25826] Num frames 9100... -[2024-07-05 12:13:11,630][25826] Num frames 9200... -[2024-07-05 12:13:11,694][25826] Num frames 9300... -[2024-07-05 12:13:11,754][25826] Num frames 9400... -[2024-07-05 12:13:11,814][25826] Num frames 9500... -[2024-07-05 12:13:11,872][25826] Num frames 9600... -[2024-07-05 12:13:11,933][25826] Num frames 9700... -[2024-07-05 12:13:11,992][25826] Num frames 9800... -[2024-07-05 12:13:12,052][25826] Num frames 9900... -[2024-07-05 12:13:12,112][25826] Num frames 10000... -[2024-07-05 12:13:12,171][25826] Num frames 10100... -[2024-07-05 12:13:12,229][25826] Num frames 10200... -[2024-07-05 12:13:12,285][25826] Avg episode rewards: #0: 39.005, true rewards: #0: 14.577 -[2024-07-05 12:13:12,286][25826] Avg episode reward: 39.005, avg true_objective: 14.577 -[2024-07-05 12:13:12,344][25826] Num frames 10300... -[2024-07-05 12:13:12,404][25826] Num frames 10400... -[2024-07-05 12:13:12,466][25826] Num frames 10500... -[2024-07-05 12:13:12,530][25826] Num frames 10600... -[2024-07-05 12:13:12,593][25826] Num frames 10700... -[2024-07-05 12:13:12,654][25826] Num frames 10800... -[2024-07-05 12:13:12,716][25826] Num frames 10900... -[2024-07-05 12:13:12,777][25826] Num frames 11000... -[2024-07-05 12:13:12,837][25826] Num frames 11100... -[2024-07-05 12:13:12,920][25826] Num frames 11200... -[2024-07-05 12:13:12,982][25826] Num frames 11300... -[2024-07-05 12:13:13,044][25826] Num frames 11400... -[2024-07-05 12:13:13,104][25826] Num frames 11500... -[2024-07-05 12:13:13,164][25826] Num frames 11600... -[2024-07-05 12:13:13,227][25826] Num frames 11700... -[2024-07-05 12:13:13,289][25826] Num frames 11800... -[2024-07-05 12:13:13,346][25826] Num frames 11900... -[2024-07-05 12:13:13,408][25826] Num frames 12000... -[2024-07-05 12:13:13,470][25826] Num frames 12100... -[2024-07-05 12:13:13,532][25826] Num frames 12200... -[2024-07-05 12:13:13,593][25826] Num frames 12300... -[2024-07-05 12:13:13,649][25826] Avg episode rewards: #0: 42.004, true rewards: #0: 15.380 -[2024-07-05 12:13:13,651][25826] Avg episode reward: 42.004, avg true_objective: 15.380 -[2024-07-05 12:13:13,714][25826] Num frames 12400... -[2024-07-05 12:13:13,773][25826] Num frames 12500... -[2024-07-05 12:13:13,834][25826] Num frames 12600... -[2024-07-05 12:13:13,897][25826] Num frames 12700... -[2024-07-05 12:13:13,956][25826] Num frames 12800... -[2024-07-05 12:13:14,016][25826] Num frames 12900... -[2024-07-05 12:13:14,076][25826] Num frames 13000... -[2024-07-05 12:13:14,136][25826] Num frames 13100... -[2024-07-05 12:13:14,196][25826] Num frames 13200... -[2024-07-05 12:13:14,258][25826] Num frames 13300... -[2024-07-05 12:13:14,320][25826] Num frames 13400... -[2024-07-05 12:13:14,379][25826] Num frames 13500... -[2024-07-05 12:13:14,439][25826] Num frames 13600... -[2024-07-05 12:13:14,502][25826] Num frames 13700... -[2024-07-05 12:13:14,562][25826] Num frames 13800... -[2024-07-05 12:13:14,622][25826] Num frames 13900... -[2024-07-05 12:13:14,682][25826] Num frames 14000... -[2024-07-05 12:13:14,740][25826] Num frames 14100... -[2024-07-05 12:13:14,845][25826] Avg episode rewards: #0: 42.420, true rewards: #0: 15.754 -[2024-07-05 12:13:14,847][25826] Avg episode reward: 42.420, avg true_objective: 15.754 -[2024-07-05 12:13:14,868][25826] Num frames 14200... -[2024-07-05 12:13:14,928][25826] Num frames 14300... -[2024-07-05 12:13:14,991][25826] Num frames 14400... -[2024-07-05 12:13:15,050][25826] Num frames 14500... -[2024-07-05 12:13:15,107][25826] Num frames 14600... -[2024-07-05 12:13:15,168][25826] Num frames 14700... -[2024-07-05 12:13:15,228][25826] Num frames 14800... -[2024-07-05 12:13:15,289][25826] Num frames 14900... -[2024-07-05 12:13:15,348][25826] Num frames 15000... -[2024-07-05 12:13:15,408][25826] Num frames 15100... -[2024-07-05 12:13:15,468][25826] Num frames 15200... -[2024-07-05 12:13:15,528][25826] Num frames 15300... -[2024-07-05 12:13:15,588][25826] Num frames 15400... -[2024-07-05 12:13:15,646][25826] Num frames 15500... -[2024-07-05 12:13:15,708][25826] Num frames 15600... -[2024-07-05 12:13:15,771][25826] Num frames 15700... -[2024-07-05 12:13:15,832][25826] Num frames 15800... -[2024-07-05 12:13:15,895][25826] Num frames 15900... -[2024-07-05 12:13:15,956][25826] Num frames 16000... -[2024-07-05 12:13:16,018][25826] Num frames 16100... -[2024-07-05 12:13:16,093][25826] Num frames 16200... -[2024-07-05 12:13:16,197][25826] Avg episode rewards: #0: 44.278, true rewards: #0: 16.279 -[2024-07-05 12:13:16,198][25826] Avg episode reward: 44.278, avg true_objective: 16.279 -[2024-07-05 12:13:32,993][25826] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 12:14:40,838][25826] Environment doom_basic already registered, overwriting... -[2024-07-05 12:14:40,840][25826] Environment doom_two_colors_easy already registered, overwriting... -[2024-07-05 12:14:40,840][25826] Environment doom_two_colors_hard already registered, overwriting... -[2024-07-05 12:14:40,841][25826] Environment doom_dm already registered, overwriting... -[2024-07-05 12:14:40,841][25826] Environment doom_dwango5 already registered, overwriting... -[2024-07-05 12:14:40,841][25826] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-07-05 12:14:40,842][25826] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-07-05 12:14:40,842][25826] Environment doom_my_way_home already registered, overwriting... -[2024-07-05 12:14:40,843][25826] Environment doom_deadly_corridor already registered, overwriting... -[2024-07-05 12:14:40,843][25826] Environment doom_defend_the_center already registered, overwriting... -[2024-07-05 12:14:40,843][25826] Environment doom_defend_the_line already registered, overwriting... -[2024-07-05 12:14:40,844][25826] Environment doom_health_gathering already registered, overwriting... -[2024-07-05 12:14:40,844][25826] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-07-05 12:14:40,844][25826] Environment doom_battle already registered, overwriting... -[2024-07-05 12:14:40,845][25826] Environment doom_battle2 already registered, overwriting... -[2024-07-05 12:14:40,845][25826] Environment doom_duel_bots already registered, overwriting... -[2024-07-05 12:14:40,845][25826] Environment doom_deathmatch_bots already registered, overwriting... -[2024-07-05 12:14:40,846][25826] Environment doom_duel already registered, overwriting... -[2024-07-05 12:14:40,846][25826] Environment doom_deathmatch_full already registered, overwriting... -[2024-07-05 12:14:40,846][25826] Environment doom_benchmark already registered, overwriting... -[2024-07-05 12:14:40,847][25826] register_encoder_factory: -[2024-07-05 12:14:40,855][25826] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 12:14:40,857][25826] Overriding arg 'train_for_env_steps' with value 200000000 passed from command line -[2024-07-05 12:14:40,863][25826] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment already exists! -[2024-07-05 12:14:40,864][25826] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment... -[2024-07-05 12:14:40,865][25826] Weights and Biases integration disabled -[2024-07-05 12:14:40,867][25826] Environment var CUDA_VISIBLE_DEVICES is 0 + wait_policy_total: 3.5569 +update_model: 4.1065 + weight_update: 0.0011 +one_step: 0.0028 + handle_policy_step: 618.6853 + deserialize: 8.4782, stack: 1.2233, obs_to_device_normalize: 101.5059, forward: 377.7692, send_messages: 11.8519 + prepare_outputs: 109.2451 + to_cpu: 98.5216 +[2024-07-05 00:13:07,252][45457] Learner 0 profile tree view: +misc: 0.0084, prepare_batch: 23.2111 +train: 512.3367 + epoch_init: 0.0171, minibatch_init: 0.0237, losses_postprocess: 0.4656, kl_divergence: 0.2496, after_optimizer: 328.6709 + calculate_losses: 172.7099 + losses_init: 0.0117, forward_head: 6.1247, bptt_initial: 162.9234, tail: 0.5551, advantages_returns: 0.1363, losses: 1.7374 + bptt: 0.9351 + bptt_forward_core: 0.8944 + update: 9.5899 + clip: 1.1686 +[2024-07-05 00:13:07,252][45457] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.1612, enqueue_policy_requests: 8.4153, env_step: 109.0174, overhead: 10.2251, complete_rollouts: 0.2774 +save_policy_outputs: 10.4592 + split_output_tensors: 4.8611 +[2024-07-05 00:13:07,252][45457] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.1197, enqueue_policy_requests: 8.3571, env_step: 116.6297, overhead: 9.9265, complete_rollouts: 0.2259 +save_policy_outputs: 10.4195 + split_output_tensors: 4.8793 +[2024-07-05 00:13:07,253][45457] Loop Runner_EvtLoop terminating... +[2024-07-05 00:13:07,254][45457] Runner profile tree view: +main_loop: 646.3090 +[2024-07-05 00:13:07,254][45457] Collected {0: 5005312}, FPS: 7744.5 +[2024-07-05 00:13:30,505][45457] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json +[2024-07-05 00:13:30,507][45457] Overriding arg 'num_workers' with value 1 passed from command line +[2024-07-05 00:13:30,508][45457] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-07-05 00:13:30,508][45457] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-07-05 00:13:30,509][45457] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 00:13:30,509][45457] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-07-05 00:13:30,510][45457] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 00:13:30,511][45457] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-07-05 00:13:30,511][45457] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-07-05 00:13:30,512][45457] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-07-05 00:13:30,512][45457] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-07-05 00:13:30,512][45457] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-07-05 00:13:30,513][45457] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-07-05 00:13:30,513][45457] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-07-05 00:13:30,513][45457] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-07-05 00:13:30,540][45457] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 00:13:30,542][45457] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 00:13:30,544][45457] RunningMeanStd input shape: (1,) +[2024-07-05 00:13:30,559][45457] Num input channels: 3 +[2024-07-05 00:13:30,573][45457] Convolutional layer output size: 4608 +[2024-07-05 00:13:30,601][45457] Policy head output size: 512 +[2024-07-05 00:13:30,838][45457] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001222_5005312.pth... +[2024-07-05 00:13:31,866][45457] Num frames 100... +[2024-07-05 00:13:31,987][45457] Num frames 200... +[2024-07-05 00:13:32,107][45457] Num frames 300... +[2024-07-05 00:13:32,245][45457] Num frames 400... +[2024-07-05 00:13:32,367][45457] Num frames 500... +[2024-07-05 00:13:32,490][45457] Num frames 600... +[2024-07-05 00:13:32,614][45457] Num frames 700... +[2024-07-05 00:13:32,733][45457] Num frames 800... +[2024-07-05 00:13:32,829][45457] Num frames 900... +[2024-07-05 00:13:32,926][45457] Num frames 1000... +[2024-07-05 00:13:33,022][45457] Num frames 1100... +[2024-07-05 00:13:33,124][45457] Num frames 1200... +[2024-07-05 00:13:33,228][45457] Num frames 1300... +[2024-07-05 00:13:33,330][45457] Num frames 1400... +[2024-07-05 00:13:33,429][45457] Num frames 1500... +[2024-07-05 00:13:33,528][45457] Num frames 1600... +[2024-07-05 00:13:33,624][45457] Num frames 1700... +[2024-07-05 00:13:33,718][45457] Num frames 1800... +[2024-07-05 00:13:33,817][45457] Num frames 1900... +[2024-07-05 00:13:33,912][45457] Num frames 2000... +[2024-07-05 00:13:34,012][45457] Num frames 2100... +[2024-07-05 00:13:34,064][45457] Avg episode rewards: #0: 56.999, true rewards: #0: 21.000 +[2024-07-05 00:13:34,065][45457] Avg episode reward: 56.999, avg true_objective: 21.000 +[2024-07-05 00:13:34,162][45457] Num frames 2200... +[2024-07-05 00:13:34,259][45457] Num frames 2300... +[2024-07-05 00:13:34,365][45457] Avg episode rewards: #0: 29.779, true rewards: #0: 11.780 +[2024-07-05 00:13:34,367][45457] Avg episode reward: 29.779, avg true_objective: 11.780 +[2024-07-05 00:13:34,410][45457] Num frames 2400... +[2024-07-05 00:13:34,508][45457] Num frames 2500... +[2024-07-05 00:13:34,604][45457] Num frames 2600... +[2024-07-05 00:13:34,704][45457] Num frames 2700... +[2024-07-05 00:13:34,806][45457] Num frames 2800... +[2024-07-05 00:13:34,921][45457] Num frames 2900... +[2024-07-05 00:13:35,035][45457] Num frames 3000... +[2024-07-05 00:13:35,132][45457] Num frames 3100... +[2024-07-05 00:13:35,228][45457] Num frames 3200... +[2024-07-05 00:13:35,323][45457] Num frames 3300... +[2024-07-05 00:13:35,422][45457] Num frames 3400... +[2024-07-05 00:13:35,513][45457] Num frames 3500... +[2024-07-05 00:13:35,607][45457] Num frames 3600... +[2024-07-05 00:13:35,701][45457] Num frames 3700... +[2024-07-05 00:13:35,752][45457] Avg episode rewards: #0: 29.000, true rewards: #0: 12.333 +[2024-07-05 00:13:35,753][45457] Avg episode reward: 29.000, avg true_objective: 12.333 +[2024-07-05 00:13:35,847][45457] Num frames 3800... +[2024-07-05 00:13:35,940][45457] Num frames 3900... +[2024-07-05 00:13:36,033][45457] Num frames 4000... +[2024-07-05 00:13:36,125][45457] Num frames 4100... +[2024-07-05 00:13:36,217][45457] Num frames 4200... +[2024-07-05 00:13:36,312][45457] Num frames 4300... +[2024-07-05 00:13:36,405][45457] Num frames 4400... +[2024-07-05 00:13:36,500][45457] Num frames 4500... +[2024-07-05 00:13:36,596][45457] Num frames 4600... +[2024-07-05 00:13:36,690][45457] Num frames 4700... +[2024-07-05 00:13:36,784][45457] Num frames 4800... +[2024-07-05 00:13:36,858][45457] Avg episode rewards: #0: 28.800, true rewards: #0: 12.050 +[2024-07-05 00:13:36,859][45457] Avg episode reward: 28.800, avg true_objective: 12.050 +[2024-07-05 00:13:36,936][45457] Num frames 4900... +[2024-07-05 00:13:37,030][45457] Num frames 5000... +[2024-07-05 00:13:37,126][45457] Num frames 5100... +[2024-07-05 00:13:37,224][45457] Num frames 5200... +[2024-07-05 00:13:37,318][45457] Num frames 5300... +[2024-07-05 00:13:37,414][45457] Num frames 5400... +[2024-07-05 00:13:37,509][45457] Num frames 5500... +[2024-07-05 00:13:37,606][45457] Num frames 5600... +[2024-07-05 00:13:37,703][45457] Num frames 5700... +[2024-07-05 00:13:37,800][45457] Num frames 5800... +[2024-07-05 00:13:37,895][45457] Num frames 5900... +[2024-07-05 00:13:37,990][45457] Num frames 6000... +[2024-07-05 00:13:38,085][45457] Num frames 6100... +[2024-07-05 00:13:38,180][45457] Num frames 6200... +[2024-07-05 00:13:38,275][45457] Num frames 6300... +[2024-07-05 00:13:38,369][45457] Num frames 6400... +[2024-07-05 00:13:38,463][45457] Num frames 6500... +[2024-07-05 00:13:38,559][45457] Num frames 6600... +[2024-07-05 00:13:38,654][45457] Num frames 6700... +[2024-07-05 00:13:38,751][45457] Num frames 6800... +[2024-07-05 00:13:38,846][45457] Num frames 6900... +[2024-07-05 00:13:38,920][45457] Avg episode rewards: #0: 34.240, true rewards: #0: 13.840 +[2024-07-05 00:13:38,921][45457] Avg episode reward: 34.240, avg true_objective: 13.840 +[2024-07-05 00:13:38,998][45457] Num frames 7000... +[2024-07-05 00:13:39,093][45457] Num frames 7100... +[2024-07-05 00:13:39,189][45457] Num frames 7200... +[2024-07-05 00:13:39,284][45457] Num frames 7300... +[2024-07-05 00:13:39,377][45457] Num frames 7400... +[2024-07-05 00:13:39,474][45457] Num frames 7500... +[2024-07-05 00:13:39,575][45457] Num frames 7600... +[2024-07-05 00:13:39,672][45457] Num frames 7700... +[2024-07-05 00:13:39,768][45457] Num frames 7800... +[2024-07-05 00:13:39,864][45457] Num frames 7900... +[2024-07-05 00:13:39,960][45457] Num frames 8000... +[2024-07-05 00:13:40,057][45457] Num frames 8100... +[2024-07-05 00:13:40,157][45457] Num frames 8200... +[2024-07-05 00:13:40,255][45457] Num frames 8300... +[2024-07-05 00:13:40,352][45457] Num frames 8400... +[2024-07-05 00:13:40,450][45457] Num frames 8500... +[2024-07-05 00:13:40,548][45457] Num frames 8600... +[2024-07-05 00:13:40,697][45457] Avg episode rewards: #0: 35.831, true rewards: #0: 14.498 +[2024-07-05 00:13:40,698][45457] Avg episode reward: 35.831, avg true_objective: 14.498 +[2024-07-05 00:13:40,699][45457] Num frames 8700... +[2024-07-05 00:13:40,791][45457] Num frames 8800... +[2024-07-05 00:13:40,883][45457] Num frames 8900... +[2024-07-05 00:13:40,978][45457] Num frames 9000... +[2024-07-05 00:13:41,073][45457] Num frames 9100... +[2024-07-05 00:13:41,163][45457] Num frames 9200... +[2024-07-05 00:13:41,253][45457] Num frames 9300... +[2024-07-05 00:13:41,310][45457] Avg episode rewards: #0: 32.575, true rewards: #0: 13.290 +[2024-07-05 00:13:41,311][45457] Avg episode reward: 32.575, avg true_objective: 13.290 +[2024-07-05 00:13:41,399][45457] Num frames 9400... +[2024-07-05 00:13:41,489][45457] Num frames 9500... +[2024-07-05 00:13:41,583][45457] Num frames 9600... +[2024-07-05 00:13:41,677][45457] Num frames 9700... +[2024-07-05 00:13:41,771][45457] Num frames 9800... +[2024-07-05 00:13:41,864][45457] Num frames 9900... +[2024-07-05 00:13:41,960][45457] Num frames 10000... +[2024-07-05 00:13:42,056][45457] Num frames 10100... +[2024-07-05 00:13:42,152][45457] Num frames 10200... +[2024-07-05 00:13:42,243][45457] Num frames 10300... +[2024-07-05 00:13:42,335][45457] Num frames 10400... +[2024-07-05 00:13:42,432][45457] Num frames 10500... +[2024-07-05 00:13:42,523][45457] Num frames 10600... +[2024-07-05 00:13:42,615][45457] Num frames 10700... +[2024-07-05 00:13:42,736][45457] Avg episode rewards: #0: 33.467, true rewards: #0: 13.468 +[2024-07-05 00:13:42,738][45457] Avg episode reward: 33.467, avg true_objective: 13.468 +[2024-07-05 00:13:42,764][45457] Num frames 10800... +[2024-07-05 00:13:42,857][45457] Num frames 10900... +[2024-07-05 00:13:42,951][45457] Num frames 11000... +[2024-07-05 00:13:43,042][45457] Num frames 11100... +[2024-07-05 00:13:43,135][45457] Num frames 11200... +[2024-07-05 00:13:43,230][45457] Num frames 11300... +[2024-07-05 00:13:43,321][45457] Num frames 11400... +[2024-07-05 00:13:43,421][45457] Num frames 11500... +[2024-07-05 00:13:43,520][45457] Num frames 11600... +[2024-07-05 00:13:43,614][45457] Num frames 11700... +[2024-07-05 00:13:43,717][45457] Num frames 11800... +[2024-07-05 00:13:43,812][45457] Num frames 11900... +[2024-07-05 00:13:43,950][45457] Avg episode rewards: #0: 32.433, true rewards: #0: 13.322 +[2024-07-05 00:13:43,951][45457] Avg episode reward: 32.433, avg true_objective: 13.322 +[2024-07-05 00:13:43,961][45457] Num frames 12000... +[2024-07-05 00:13:44,054][45457] Num frames 12100... +[2024-07-05 00:13:44,148][45457] Num frames 12200... +[2024-07-05 00:13:44,245][45457] Num frames 12300... +[2024-07-05 00:13:44,341][45457] Num frames 12400... +[2024-07-05 00:13:44,436][45457] Num frames 12500... +[2024-07-05 00:13:44,554][45457] Avg episode rewards: #0: 30.366, true rewards: #0: 12.566 +[2024-07-05 00:13:44,555][45457] Avg episode reward: 30.366, avg true_objective: 12.566 +[2024-07-05 00:13:59,152][45457] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/replay.mp4! +[2024-07-05 00:21:35,280][45457] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json +[2024-07-05 00:21:35,281][45457] Overriding arg 'num_workers' with value 1 passed from command line +[2024-07-05 00:21:35,281][45457] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-07-05 00:21:35,282][45457] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-07-05 00:21:35,282][45457] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 00:21:35,283][45457] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-07-05 00:21:35,283][45457] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-07-05 00:21:35,283][45457] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-07-05 00:21:35,284][45457] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-07-05 00:21:35,284][45457] Adding new argument 'hf_repository'='ra9hu/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-07-05 00:21:35,285][45457] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-07-05 00:21:35,285][45457] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-07-05 00:21:35,285][45457] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-07-05 00:21:35,286][45457] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-07-05 00:21:35,286][45457] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-07-05 00:21:35,307][45457] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 00:21:35,309][45457] RunningMeanStd input shape: (1,) +[2024-07-05 00:21:35,319][45457] Num input channels: 3 +[2024-07-05 00:21:35,328][45457] Convolutional layer output size: 4608 +[2024-07-05 00:21:35,342][45457] Policy head output size: 512 +[2024-07-05 00:21:35,419][45457] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001222_5005312.pth... +[2024-07-05 00:21:36,108][45457] Num frames 100... +[2024-07-05 00:21:36,216][45457] Num frames 200... +[2024-07-05 00:21:36,317][45457] Num frames 300... +[2024-07-05 00:21:36,421][45457] Num frames 400... +[2024-07-05 00:21:36,518][45457] Num frames 500... +[2024-07-05 00:21:36,607][45457] Num frames 600... +[2024-07-05 00:21:36,701][45457] Num frames 700... +[2024-07-05 00:21:36,795][45457] Num frames 800... +[2024-07-05 00:21:36,899][45457] Num frames 900... +[2024-07-05 00:21:36,985][45457] Num frames 1000... +[2024-07-05 00:21:37,075][45457] Num frames 1100... +[2024-07-05 00:21:37,167][45457] Num frames 1200... +[2024-07-05 00:21:37,262][45457] Num frames 1300... +[2024-07-05 00:21:37,360][45457] Num frames 1400... +[2024-07-05 00:21:37,457][45457] Num frames 1500... +[2024-07-05 00:21:37,550][45457] Num frames 1600... +[2024-07-05 00:21:37,645][45457] Num frames 1700... +[2024-07-05 00:21:37,738][45457] Num frames 1800... +[2024-07-05 00:21:37,816][45457] Avg episode rewards: #0: 46.239, true rewards: #0: 18.240 +[2024-07-05 00:21:37,817][45457] Avg episode reward: 46.239, avg true_objective: 18.240 +[2024-07-05 00:21:37,886][45457] Num frames 1900... +[2024-07-05 00:21:37,980][45457] Num frames 2000... +[2024-07-05 00:21:38,072][45457] Num frames 2100... +[2024-07-05 00:21:38,167][45457] Num frames 2200... +[2024-07-05 00:21:38,262][45457] Num frames 2300... +[2024-07-05 00:21:38,358][45457] Num frames 2400... +[2024-07-05 00:21:38,410][45457] Avg episode rewards: #0: 28.000, true rewards: #0: 12.000 +[2024-07-05 00:21:38,411][45457] Avg episode reward: 28.000, avg true_objective: 12.000 +[2024-07-05 00:21:38,505][45457] Num frames 2500... +[2024-07-05 00:21:38,597][45457] Num frames 2600... +[2024-07-05 00:21:38,690][45457] Num frames 2700... +[2024-07-05 00:21:38,786][45457] Num frames 2800... +[2024-07-05 00:21:38,878][45457] Num frames 2900... +[2024-07-05 00:21:38,975][45457] Num frames 3000... +[2024-07-05 00:21:39,072][45457] Num frames 3100... +[2024-07-05 00:21:39,169][45457] Num frames 3200... +[2024-07-05 00:21:39,314][45457] Avg episode rewards: #0: 23.986, true rewards: #0: 10.987 +[2024-07-05 00:21:39,316][45457] Avg episode reward: 23.986, avg true_objective: 10.987 +[2024-07-05 00:21:39,321][45457] Num frames 3300... +[2024-07-05 00:21:39,416][45457] Num frames 3400... +[2024-07-05 00:21:39,508][45457] Num frames 3500... +[2024-07-05 00:21:39,604][45457] Num frames 3600... +[2024-07-05 00:21:39,700][45457] Num frames 3700... +[2024-07-05 00:21:39,814][45457] Num frames 3800... +[2024-07-05 00:21:39,934][45457] Num frames 3900... +[2024-07-05 00:21:40,033][45457] Num frames 4000... +[2024-07-05 00:21:40,124][45457] Num frames 4100... +[2024-07-05 00:21:40,215][45457] Num frames 4200... +[2024-07-05 00:21:40,305][45457] Num frames 4300... +[2024-07-05 00:21:40,399][45457] Num frames 4400... +[2024-07-05 00:21:40,492][45457] Num frames 4500... +[2024-07-05 00:21:40,584][45457] Num frames 4600... +[2024-07-05 00:21:40,676][45457] Num frames 4700... +[2024-07-05 00:21:40,770][45457] Num frames 4800... +[2024-07-05 00:21:40,862][45457] Num frames 4900... +[2024-07-05 00:21:40,955][45457] Num frames 5000... +[2024-07-05 00:21:41,047][45457] Num frames 5100... +[2024-07-05 00:21:41,140][45457] Num frames 5200... +[2024-07-05 00:21:41,232][45457] Num frames 5300... +[2024-07-05 00:21:41,374][45457] Avg episode rewards: #0: 31.490, true rewards: #0: 13.490 +[2024-07-05 00:21:41,375][45457] Avg episode reward: 31.490, avg true_objective: 13.490 +[2024-07-05 00:21:41,379][45457] Num frames 5400... +[2024-07-05 00:21:41,471][45457] Num frames 5500... +[2024-07-05 00:21:41,564][45457] Num frames 5600... +[2024-07-05 00:21:41,657][45457] Num frames 5700... +[2024-07-05 00:21:41,751][45457] Num frames 5800... +[2024-07-05 00:21:41,840][45457] Num frames 5900... +[2024-07-05 00:21:41,930][45457] Num frames 6000... +[2024-07-05 00:21:41,989][45457] Avg episode rewards: #0: 27.008, true rewards: #0: 12.008 +[2024-07-05 00:21:41,990][45457] Avg episode reward: 27.008, avg true_objective: 12.008 +[2024-07-05 00:21:42,078][45457] Num frames 6100... +[2024-07-05 00:21:42,169][45457] Num frames 6200... +[2024-07-05 00:21:42,262][45457] Num frames 6300... +[2024-07-05 00:21:42,355][45457] Num frames 6400... +[2024-07-05 00:21:42,456][45457] Avg episode rewards: #0: 23.420, true rewards: #0: 10.753 +[2024-07-05 00:21:42,456][45457] Avg episode reward: 23.420, avg true_objective: 10.753 +[2024-07-05 00:21:42,501][45457] Num frames 6500... +[2024-07-05 00:21:42,594][45457] Num frames 6600... +[2024-07-05 00:21:42,686][45457] Num frames 6700... +[2024-07-05 00:21:42,778][45457] Num frames 6800... +[2024-07-05 00:21:42,867][45457] Num frames 6900... +[2024-07-05 00:21:42,960][45457] Num frames 7000... +[2024-07-05 00:21:43,050][45457] Num frames 7100... +[2024-07-05 00:21:43,140][45457] Num frames 7200... +[2024-07-05 00:21:43,232][45457] Num frames 7300... +[2024-07-05 00:21:43,332][45457] Num frames 7400... +[2024-07-05 00:21:43,423][45457] Num frames 7500... +[2024-07-05 00:21:43,517][45457] Num frames 7600... +[2024-07-05 00:21:43,609][45457] Num frames 7700... +[2024-07-05 00:21:43,702][45457] Num frames 7800... +[2024-07-05 00:21:43,794][45457] Num frames 7900... +[2024-07-05 00:21:43,879][45457] Num frames 8000... +[2024-07-05 00:21:43,958][45457] Num frames 8100... +[2024-07-05 00:21:44,040][45457] Num frames 8200... +[2024-07-05 00:21:44,119][45457] Num frames 8300... +[2024-07-05 00:21:44,197][45457] Num frames 8400... +[2024-07-05 00:21:44,277][45457] Num frames 8500... +[2024-07-05 00:21:44,371][45457] Avg episode rewards: #0: 28.788, true rewards: #0: 12.217 +[2024-07-05 00:21:44,372][45457] Avg episode reward: 28.788, avg true_objective: 12.217 +[2024-07-05 00:21:44,408][45457] Num frames 8600... +[2024-07-05 00:21:44,479][45457] Num frames 8700... +[2024-07-05 00:21:44,561][45457] Num frames 8800... +[2024-07-05 00:21:44,641][45457] Num frames 8900... +[2024-07-05 00:21:44,727][45457] Num frames 9000... +[2024-07-05 00:21:44,813][45457] Num frames 9100... +[2024-07-05 00:21:44,897][45457] Num frames 9200... +[2024-07-05 00:21:44,987][45457] Num frames 9300... +[2024-07-05 00:21:45,072][45457] Num frames 9400... +[2024-07-05 00:21:45,157][45457] Num frames 9500... +[2024-07-05 00:21:45,242][45457] Num frames 9600... +[2024-07-05 00:21:45,328][45457] Num frames 9700... +[2024-07-05 00:21:45,412][45457] Num frames 9800... +[2024-07-05 00:21:45,497][45457] Num frames 9900... +[2024-07-05 00:21:45,583][45457] Num frames 10000... +[2024-07-05 00:21:45,668][45457] Num frames 10100... +[2024-07-05 00:21:45,754][45457] Num frames 10200... +[2024-07-05 00:21:45,838][45457] Num frames 10300... +[2024-07-05 00:21:45,923][45457] Num frames 10400... +[2024-07-05 00:21:46,006][45457] Num frames 10500... +[2024-07-05 00:21:46,092][45457] Num frames 10600... +[2024-07-05 00:21:46,191][45457] Avg episode rewards: #0: 32.190, true rewards: #0: 13.315 +[2024-07-05 00:21:46,193][45457] Avg episode reward: 32.190, avg true_objective: 13.315 +[2024-07-05 00:21:46,234][45457] Num frames 10700... +[2024-07-05 00:21:46,317][45457] Num frames 10800... +[2024-07-05 00:21:46,398][45457] Num frames 10900... +[2024-07-05 00:21:46,484][45457] Num frames 11000... +[2024-07-05 00:21:46,566][45457] Num frames 11100... +[2024-07-05 00:21:46,648][45457] Num frames 11200... +[2024-07-05 00:21:46,731][45457] Num frames 11300... +[2024-07-05 00:21:46,817][45457] Num frames 11400... +[2024-07-05 00:21:46,903][45457] Num frames 11500... +[2024-07-05 00:21:46,996][45457] Num frames 11600... +[2024-07-05 00:21:47,081][45457] Num frames 11700... +[2024-07-05 00:21:47,168][45457] Num frames 11800... +[2024-07-05 00:21:47,263][45457] Num frames 11900... +[2024-07-05 00:21:47,352][45457] Num frames 12000... +[2024-07-05 00:21:47,438][45457] Num frames 12100... +[2024-07-05 00:21:47,526][45457] Num frames 12200... +[2024-07-05 00:21:47,613][45457] Num frames 12300... +[2024-07-05 00:21:47,701][45457] Num frames 12400... +[2024-07-05 00:21:47,789][45457] Num frames 12500... +[2024-07-05 00:21:47,876][45457] Num frames 12600... +[2024-07-05 00:21:47,965][45457] Num frames 12700... +[2024-07-05 00:21:48,065][45457] Avg episode rewards: #0: 33.946, true rewards: #0: 14.169 +[2024-07-05 00:21:48,068][45457] Avg episode reward: 33.946, avg true_objective: 14.169 +[2024-07-05 00:21:48,111][45457] Num frames 12800... +[2024-07-05 00:21:48,196][45457] Num frames 12900... +[2024-07-05 00:21:48,282][45457] Num frames 13000... +[2024-07-05 00:21:48,369][45457] Num frames 13100... +[2024-07-05 00:21:48,456][45457] Num frames 13200... +[2024-07-05 00:21:48,564][45457] Avg episode rewards: #0: 31.264, true rewards: #0: 13.264 +[2024-07-05 00:21:48,566][45457] Avg episode reward: 31.264, avg true_objective: 13.264 +[2024-07-05 00:22:03,968][45457] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/replay.mp4! +[2024-07-05 00:22:47,229][45457] The model has been pushed to https://huggingface.co/ra9hu/rl_course_vizdoom_health_gathering_supreme +[2024-07-05 00:32:07,320][45457] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json +[2024-07-05 00:32:07,321][45457] Overriding arg 'num_workers' with value 1 passed from command line +[2024-07-05 00:32:07,322][45457] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-07-05 00:32:07,322][45457] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-07-05 00:32:07,323][45457] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 00:32:07,324][45457] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-07-05 00:32:07,324][45457] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 00:32:07,324][45457] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-07-05 00:32:07,325][45457] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-07-05 00:32:07,325][45457] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-07-05 00:32:07,325][45457] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-07-05 00:32:07,325][45457] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-07-05 00:32:07,326][45457] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-07-05 00:32:07,326][45457] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-07-05 00:32:07,326][45457] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-07-05 00:32:07,345][45457] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 00:32:07,346][45457] RunningMeanStd input shape: (1,) +[2024-07-05 00:32:07,357][45457] ConvEncoder: input_channels=3 +[2024-07-05 00:32:07,480][45457] Conv encoder output size: 512 +[2024-07-05 00:32:07,482][45457] Policy head output size: 512 +[2024-07-05 00:32:07,507][45457] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth... +[2024-07-05 00:33:54,772][45457] Environment doom_basic already registered, overwriting... +[2024-07-05 00:33:54,774][45457] Environment doom_two_colors_easy already registered, overwriting... +[2024-07-05 00:33:54,774][45457] Environment doom_two_colors_hard already registered, overwriting... +[2024-07-05 00:33:54,775][45457] Environment doom_dm already registered, overwriting... +[2024-07-05 00:33:54,775][45457] Environment doom_dwango5 already registered, overwriting... +[2024-07-05 00:33:54,775][45457] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-07-05 00:33:54,776][45457] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-07-05 00:33:54,776][45457] Environment doom_my_way_home already registered, overwriting... +[2024-07-05 00:33:54,776][45457] Environment doom_deadly_corridor already registered, overwriting... +[2024-07-05 00:33:54,776][45457] Environment doom_defend_the_center already registered, overwriting... +[2024-07-05 00:33:54,777][45457] Environment doom_defend_the_line already registered, overwriting... +[2024-07-05 00:33:54,777][45457] Environment doom_health_gathering already registered, overwriting... +[2024-07-05 00:33:54,777][45457] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-07-05 00:33:54,778][45457] Environment doom_battle already registered, overwriting... +[2024-07-05 00:33:54,778][45457] Environment doom_battle2 already registered, overwriting... +[2024-07-05 00:33:54,779][45457] Environment doom_duel_bots already registered, overwriting... +[2024-07-05 00:33:54,779][45457] Environment doom_deathmatch_bots already registered, overwriting... +[2024-07-05 00:33:54,780][45457] Environment doom_duel already registered, overwriting... +[2024-07-05 00:33:54,780][45457] Environment doom_deathmatch_full already registered, overwriting... +[2024-07-05 00:33:54,781][45457] Environment doom_benchmark already registered, overwriting... +[2024-07-05 00:33:54,781][45457] register_encoder_factory: +[2024-07-05 00:33:54,789][45457] Saved parameter configuration for experiment default_experiment not found! +[2024-07-05 00:33:54,790][45457] Starting experiment from scratch! +[2024-07-05 00:33:54,795][45457] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment already exists! +[2024-07-05 00:33:54,797][45457] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment... +[2024-07-05 00:33:54,797][45457] Weights and Biases integration disabled +[2024-07-05 00:33:54,799][45457] Environment var CUDA_VISIBLE_DEVICES is 0 -[2024-07-05 12:14:44,642][25826] Starting experiment with the following configuration: +[2024-07-05 00:33:58,058][45457] Automatically setting recurrence to 32 +[2024-07-05 00:33:58,059][45457] Starting experiment with the following configuration: help=False algo=APPO env=doom_health_gathering_supreme @@ -7710,7 +1098,7 @@ experiment=default_experiment train_dir=/home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir restart_behavior=resume device=gpu -seed=200 +seed=None num_policies=1 async_rl=True serial_mode=False @@ -7719,9 +1107,9 @@ num_batches_to_accumulate=2 worker_num_splits=2 policy_workers_per_policy=1 max_policy_lag=1000 -num_workers=16 -num_envs_per_worker=8 -batch_size=2048 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 num_batches_per_epoch=1 num_epochs=1 rollout=32 @@ -7769,7 +1157,7 @@ stats_avg=100 summaries_use_frameskip=True heartbeat_interval=20 heartbeat_reporting_interval=600 -train_for_env_steps=200000000 +train_for_env_steps=20000000 train_for_seconds=10000000000 save_every_sec=120 keep_checkpoints=2 @@ -7833,71 +1221,620 @@ command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_ cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 20000000} git_hash=unknown git_repo_name=not a git repository -[2024-07-05 12:14:44,643][25826] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 12:14:44,644][25826] Rollout worker 0 uses device cpu -[2024-07-05 12:14:44,645][25826] Rollout worker 1 uses device cpu -[2024-07-05 12:14:44,645][25826] Rollout worker 2 uses device cpu -[2024-07-05 12:14:44,645][25826] Rollout worker 3 uses device cpu -[2024-07-05 12:14:44,646][25826] Rollout worker 4 uses device cpu -[2024-07-05 12:14:44,646][25826] Rollout worker 5 uses device cpu -[2024-07-05 12:14:44,646][25826] Rollout worker 6 uses device cpu -[2024-07-05 12:14:44,647][25826] Rollout worker 7 uses device cpu -[2024-07-05 12:14:44,647][25826] Rollout worker 8 uses device cpu -[2024-07-05 12:14:44,648][25826] Rollout worker 9 uses device cpu -[2024-07-05 12:14:44,648][25826] Rollout worker 10 uses device cpu -[2024-07-05 12:14:44,649][25826] Rollout worker 11 uses device cpu -[2024-07-05 12:14:44,649][25826] Rollout worker 12 uses device cpu -[2024-07-05 12:14:44,649][25826] Rollout worker 13 uses device cpu -[2024-07-05 12:14:44,650][25826] Rollout worker 14 uses device cpu -[2024-07-05 12:14:44,650][25826] Rollout worker 15 uses device cpu -[2024-07-05 12:14:44,749][25826] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:14:44,750][25826] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 12:14:44,799][25826] Starting all processes... -[2024-07-05 12:14:44,799][25826] Starting process learner_proc0 -[2024-07-05 12:14:44,849][25826] Starting all processes... -[2024-07-05 12:14:44,853][25826] Starting process inference_proc0-0 -[2024-07-05 12:14:44,854][25826] Starting process rollout_proc0 -[2024-07-05 12:14:44,854][25826] Starting process rollout_proc1 -[2024-07-05 12:14:44,855][25826] Starting process rollout_proc2 -[2024-07-05 12:14:44,856][25826] Starting process rollout_proc3 -[2024-07-05 12:14:44,856][25826] Starting process rollout_proc4 -[2024-07-05 12:14:44,856][25826] Starting process rollout_proc5 -[2024-07-05 12:14:44,858][25826] Starting process rollout_proc6 -[2024-07-05 12:14:44,859][25826] Starting process rollout_proc7 -[2024-07-05 12:14:44,861][25826] Starting process rollout_proc8 -[2024-07-05 12:14:44,862][25826] Starting process rollout_proc9 -[2024-07-05 12:14:44,862][25826] Starting process rollout_proc10 -[2024-07-05 12:14:44,862][25826] Starting process rollout_proc11 -[2024-07-05 12:14:44,863][25826] Starting process rollout_proc12 -[2024-07-05 12:14:44,863][25826] Starting process rollout_proc13 -[2024-07-05 12:14:44,866][25826] Starting process rollout_proc14 -[2024-07-05 12:14:44,886][25826] Starting process rollout_proc15 -[2024-07-05 12:14:49,176][51877] Worker 3 uses CPU cores [3] -[2024-07-05 12:14:49,184][51875] Worker 2 uses CPU cores [2] -[2024-07-05 12:14:49,185][51853] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:14:49,185][51853] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 12:14:49,200][51903] Worker 15 uses CPU cores [15] -[2024-07-05 12:14:49,216][51904] Worker 14 uses CPU cores [14] -[2024-07-05 12:14:49,256][51897] Worker 8 uses CPU cores [8] -[2024-07-05 12:14:49,292][51853] Num visible devices: 1 -[2024-07-05 12:14:49,320][51876] Worker 1 uses CPU cores [1] -[2024-07-05 12:14:49,324][51853] Setting fixed seed 200 -[2024-07-05 12:14:49,327][51853] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:14:49,328][51853] Initializing actor-critic model on device cuda:0 -[2024-07-05 12:14:49,328][51853] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 12:14:49,331][51853] RunningMeanStd input shape: (1,) -[2024-07-05 12:14:49,345][51853] ConvEncoder: input_channels=3 -[2024-07-05 12:14:49,400][51874] Worker 0 uses CPU cores [0] -[2024-07-05 12:14:49,417][51873] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:14:49,418][51873] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 12:14:49,424][51899] Worker 10 uses CPU cores [10] -[2024-07-05 12:14:49,513][51853] Conv encoder output size: 512 -[2024-07-05 12:14:49,513][51853] Policy head output size: 512 -[2024-07-05 12:14:49,519][51878] Worker 4 uses CPU cores [4] -[2024-07-05 12:14:49,518][51873] Num visible devices: 1 -[2024-07-05 12:14:49,528][51853] Created Actor Critic model with architecture: -[2024-07-05 12:14:49,528][51896] Worker 6 uses CPU cores [6] -[2024-07-05 12:14:49,528][51853] ActorCriticSharedWeights( +[2024-07-05 00:33:58,060][45457] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... +[2024-07-05 00:33:58,061][45457] Rollout worker 0 uses device cpu +[2024-07-05 00:33:58,061][45457] Rollout worker 1 uses device cpu +[2024-07-05 00:33:58,062][45457] Rollout worker 2 uses device cpu +[2024-07-05 00:33:58,062][45457] Rollout worker 3 uses device cpu +[2024-07-05 00:33:58,062][45457] Rollout worker 4 uses device cpu +[2024-07-05 00:33:58,063][45457] Rollout worker 5 uses device cpu +[2024-07-05 00:33:58,063][45457] Rollout worker 6 uses device cpu +[2024-07-05 00:33:58,063][45457] Rollout worker 7 uses device cpu +[2024-07-05 00:33:58,093][45457] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 00:33:58,094][45457] InferenceWorker_p0-w0: min num requests: 2 +[2024-07-05 00:33:58,121][45457] Starting all processes... +[2024-07-05 00:33:58,122][45457] Starting process learner_proc0 +[2024-07-05 00:33:58,171][45457] Starting all processes... +[2024-07-05 00:33:58,174][45457] Starting process inference_proc0-0 +[2024-07-05 00:33:58,174][45457] Starting process rollout_proc0 +[2024-07-05 00:33:58,175][45457] Starting process rollout_proc1 +[2024-07-05 00:33:58,175][45457] Starting process rollout_proc2 +[2024-07-05 00:33:58,175][45457] Starting process rollout_proc3 +[2024-07-05 00:33:58,177][45457] Starting process rollout_proc4 +[2024-07-05 00:33:58,178][45457] Starting process rollout_proc5 +[2024-07-05 00:33:58,179][45457] Starting process rollout_proc6 +[2024-07-05 00:33:58,179][45457] Starting process rollout_proc7 +[2024-07-05 00:34:02,272][45457] Inference worker 0-0 is ready! +[2024-07-05 00:34:02,273][45457] All inference workers are ready! Signal rollout workers to start! +[2024-07-05 00:34:04,800][45457] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 00:34:04,801][45457] Avg episode reward: [(0, '1.869')] +[2024-07-05 00:34:09,800][45457] Fps is (10 sec: 18022.6, 60 sec: 18022.6, 300 sec: 18022.6). Total num frames: 90112. Throughput: 0: 3338.0. Samples: 16690. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-07-05 00:34:09,801][45457] Avg episode reward: [(0, '4.463')] +[2024-07-05 00:34:14,800][45457] Fps is (10 sec: 20070.5, 60 sec: 20070.5, 300 sec: 20070.5). Total num frames: 200704. Throughput: 0: 4792.2. Samples: 47922. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-07-05 00:34:14,801][45457] Avg episode reward: [(0, '4.281')] +[2024-07-05 00:34:18,085][45457] Heartbeat connected on Batcher_0 +[2024-07-05 00:34:18,089][45457] Heartbeat connected on LearnerWorker_p0 +[2024-07-05 00:34:18,097][45457] Heartbeat connected on InferenceWorker_p0-w0 +[2024-07-05 00:34:18,099][45457] Heartbeat connected on RolloutWorker_w0 +[2024-07-05 00:34:18,102][45457] Heartbeat connected on RolloutWorker_w1 +[2024-07-05 00:34:18,104][45457] Heartbeat connected on RolloutWorker_w2 +[2024-07-05 00:34:18,108][45457] Heartbeat connected on RolloutWorker_w3 +[2024-07-05 00:34:18,112][45457] Heartbeat connected on RolloutWorker_w4 +[2024-07-05 00:34:18,115][45457] Heartbeat connected on RolloutWorker_w5 +[2024-07-05 00:34:18,119][45457] Heartbeat connected on RolloutWorker_w6 +[2024-07-05 00:34:18,126][45457] Heartbeat connected on RolloutWorker_w7 +[2024-07-05 00:34:19,800][45457] Fps is (10 sec: 21707.3, 60 sec: 20479.1, 300 sec: 20479.1). Total num frames: 307200. Throughput: 0: 4328.5. Samples: 64930. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-07-05 00:34:19,804][45457] Avg episode reward: [(0, '4.474')] +[2024-07-05 00:34:24,800][45457] Fps is (10 sec: 21708.6, 60 sec: 20889.5, 300 sec: 20889.5). Total num frames: 417792. Throughput: 0: 4825.5. Samples: 96510. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-07-05 00:34:24,801][45457] Avg episode reward: [(0, '4.698')] +[2024-07-05 00:34:29,801][45457] Fps is (10 sec: 21298.7, 60 sec: 20807.0, 300 sec: 20807.0). Total num frames: 520192. Throughput: 0: 5156.6. Samples: 128920. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-07-05 00:34:29,806][45457] Avg episode reward: [(0, '4.553')] +[2024-07-05 00:34:34,800][45457] Fps is (10 sec: 21709.1, 60 sec: 21162.7, 300 sec: 21162.7). Total num frames: 634880. Throughput: 0: 4842.4. Samples: 145272. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-07-05 00:34:34,800][45457] Avg episode reward: [(0, '4.488')] +[2024-07-05 00:34:39,800][45457] Fps is (10 sec: 22530.0, 60 sec: 21299.2, 300 sec: 21299.2). Total num frames: 745472. Throughput: 0: 5105.6. Samples: 178696. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:34:39,801][45457] Avg episode reward: [(0, '4.690')] +[2024-07-05 00:34:44,800][45457] Fps is (10 sec: 22527.9, 60 sec: 21504.0, 300 sec: 21504.0). Total num frames: 860160. Throughput: 0: 5316.6. Samples: 212664. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:34:44,800][45457] Avg episode reward: [(0, '4.569')] +[2024-07-05 00:34:49,800][45457] Fps is (10 sec: 22528.1, 60 sec: 21572.3, 300 sec: 21572.3). Total num frames: 970752. Throughput: 0: 5103.0. Samples: 229634. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:34:49,800][45457] Avg episode reward: [(0, '4.602')] +[2024-07-05 00:34:54,800][45457] Fps is (10 sec: 22118.4, 60 sec: 21626.9, 300 sec: 21626.9). Total num frames: 1081344. Throughput: 0: 5466.3. Samples: 262674. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:34:54,800][45457] Avg episode reward: [(0, '4.647')] +[2024-07-05 00:34:59,800][45457] Fps is (10 sec: 21708.9, 60 sec: 21597.1, 300 sec: 21597.1). Total num frames: 1187840. Throughput: 0: 5499.1. Samples: 295380. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:34:59,800][45457] Avg episode reward: [(0, '4.902')] +[2024-07-05 00:35:04,801][45457] Fps is (10 sec: 21297.4, 60 sec: 21572.0, 300 sec: 21572.0). Total num frames: 1294336. Throughput: 0: 5484.8. Samples: 311748. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:35:04,802][45457] Avg episode reward: [(0, '5.150')] +[2024-07-05 00:35:09,800][45457] Fps is (10 sec: 21708.7, 60 sec: 21913.6, 300 sec: 21614.3). Total num frames: 1404928. Throughput: 0: 5499.5. Samples: 343988. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:35:09,801][45457] Avg episode reward: [(0, '5.423')] +[2024-07-05 00:35:14,800][45457] Fps is (10 sec: 22120.3, 60 sec: 21913.6, 300 sec: 21650.3). Total num frames: 1515520. Throughput: 0: 5503.3. Samples: 376562. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:35:14,800][45457] Avg episode reward: [(0, '6.091')] +[2024-07-05 00:35:19,801][45457] Fps is (10 sec: 21706.8, 60 sec: 21913.5, 300 sec: 21626.6). Total num frames: 1622016. Throughput: 0: 5509.0. Samples: 393184. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:35:19,803][45457] Avg episode reward: [(0, '6.431')] +[2024-07-05 00:35:24,800][45457] Fps is (10 sec: 22118.4, 60 sec: 21981.9, 300 sec: 21708.8). Total num frames: 1736704. Throughput: 0: 5507.9. Samples: 426552. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:35:24,800][45457] Avg episode reward: [(0, '7.559')] +[2024-07-05 00:35:29,801][45457] Fps is (10 sec: 22118.6, 60 sec: 22050.2, 300 sec: 21684.5). Total num frames: 1843200. Throughput: 0: 5483.1. Samples: 459408. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 00:35:29,805][45457] Avg episode reward: [(0, '8.770')] +[2024-07-05 00:35:34,800][45457] Fps is (10 sec: 21708.7, 60 sec: 21981.9, 300 sec: 21708.8). Total num frames: 1953792. Throughput: 0: 5470.5. Samples: 475808. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:35:34,801][45457] Avg episode reward: [(0, '9.763')] +[2024-07-05 00:35:39,801][45457] Fps is (10 sec: 22118.2, 60 sec: 21981.5, 300 sec: 21730.2). Total num frames: 2064384. Throughput: 0: 5474.7. Samples: 509042. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:35:39,807][45457] Avg episode reward: [(0, '11.270')] +[2024-07-05 00:35:44,800][45457] Fps is (10 sec: 22118.3, 60 sec: 21913.6, 300 sec: 21749.8). Total num frames: 2174976. Throughput: 0: 5482.3. Samples: 542082. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-07-05 00:35:44,801][45457] Avg episode reward: [(0, '10.500')] +[2024-07-05 00:35:49,800][45457] Fps is (10 sec: 22530.2, 60 sec: 21981.9, 300 sec: 21806.3). Total num frames: 2289664. Throughput: 0: 5498.8. Samples: 559190. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:35:49,801][45457] Avg episode reward: [(0, '11.387')] +[2024-07-05 00:35:54,800][45457] Fps is (10 sec: 22937.2, 60 sec: 22050.1, 300 sec: 21857.7). Total num frames: 2404352. Throughput: 0: 5542.7. Samples: 593412. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-07-05 00:35:54,804][45457] Avg episode reward: [(0, '13.066')] +[2024-07-05 00:35:59,800][45457] Fps is (10 sec: 22937.7, 60 sec: 22186.7, 300 sec: 21904.7). Total num frames: 2519040. Throughput: 0: 5587.8. Samples: 628014. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:35:59,800][45457] Avg episode reward: [(0, '15.449')] +[2024-07-05 00:36:04,800][45457] Fps is (10 sec: 22938.0, 60 sec: 22323.5, 300 sec: 21947.7). Total num frames: 2633728. Throughput: 0: 5602.1. Samples: 645272. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-07-05 00:36:04,800][45457] Avg episode reward: [(0, '16.120')] +[2024-07-05 00:36:09,800][45457] Fps is (10 sec: 22528.0, 60 sec: 22323.2, 300 sec: 21954.6). Total num frames: 2744320. Throughput: 0: 5603.2. Samples: 678698. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-07-05 00:36:09,801][45457] Avg episode reward: [(0, '20.173')] +[2024-07-05 00:36:14,800][45457] Fps is (10 sec: 22118.4, 60 sec: 22323.2, 300 sec: 21960.9). Total num frames: 2854912. Throughput: 0: 5606.5. Samples: 711698. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-07-05 00:36:14,801][45457] Avg episode reward: [(0, '18.417')] +[2024-07-05 00:36:19,800][45457] Fps is (10 sec: 22118.4, 60 sec: 22391.8, 300 sec: 21966.7). Total num frames: 2965504. Throughput: 0: 5612.1. Samples: 728354. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:36:19,801][45457] Avg episode reward: [(0, '18.290')] +[2024-07-05 00:36:24,800][45457] Fps is (10 sec: 22528.0, 60 sec: 22391.5, 300 sec: 22001.4). Total num frames: 3080192. Throughput: 0: 5632.3. Samples: 762492. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:36:24,801][45457] Avg episode reward: [(0, '17.796')] +[2024-07-05 00:36:29,800][45457] Fps is (10 sec: 22937.1, 60 sec: 22528.3, 300 sec: 22033.6). Total num frames: 3194880. Throughput: 0: 5666.9. Samples: 797094. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:36:29,801][45457] Avg episode reward: [(0, '18.992')] +[2024-07-05 00:36:34,800][45457] Fps is (10 sec: 22527.2, 60 sec: 22527.9, 300 sec: 22036.4). Total num frames: 3305472. Throughput: 0: 5657.2. Samples: 813768. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:36:34,801][45457] Avg episode reward: [(0, '22.368')] +[2024-07-05 00:36:39,800][45457] Fps is (10 sec: 22528.1, 60 sec: 22596.6, 300 sec: 22065.5). Total num frames: 3420160. Throughput: 0: 5643.2. Samples: 847358. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-07-05 00:36:39,801][45457] Avg episode reward: [(0, '19.952')] +[2024-07-05 00:36:44,800][45457] Fps is (10 sec: 22938.4, 60 sec: 22664.5, 300 sec: 22092.8). Total num frames: 3534848. Throughput: 0: 5633.3. Samples: 881512. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 00:36:44,800][45457] Avg episode reward: [(0, '17.793')] +[2024-07-05 00:36:49,800][45457] Fps is (10 sec: 24576.3, 60 sec: 22937.6, 300 sec: 22217.7). Total num frames: 3665920. Throughput: 0: 5646.3. Samples: 899356. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:36:49,801][45457] Avg episode reward: [(0, '22.511')] +[2024-07-05 00:36:54,800][45457] Fps is (10 sec: 27443.2, 60 sec: 23415.5, 300 sec: 22407.5). Total num frames: 3809280. Throughput: 0: 5869.8. Samples: 942838. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:36:54,800][45457] Avg episode reward: [(0, '22.164')] +[2024-07-05 00:36:59,800][45457] Fps is (10 sec: 28671.8, 60 sec: 23893.3, 300 sec: 22586.5). Total num frames: 3952640. Throughput: 0: 6096.7. Samples: 986052. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:36:59,801][45457] Avg episode reward: [(0, '22.819')] +[2024-07-05 00:37:04,800][45457] Fps is (10 sec: 28672.2, 60 sec: 24371.2, 300 sec: 22755.6). Total num frames: 4096000. Throughput: 0: 6201.9. Samples: 1007440. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 00:37:04,800][45457] Avg episode reward: [(0, '22.560')] +[2024-07-05 00:37:09,800][45457] Fps is (10 sec: 28262.7, 60 sec: 24849.0, 300 sec: 22893.3). Total num frames: 4235264. Throughput: 0: 6373.3. Samples: 1049292. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 00:37:09,801][45457] Avg episode reward: [(0, '21.920')] +[2024-07-05 00:37:14,800][45457] Fps is (10 sec: 27852.6, 60 sec: 25326.9, 300 sec: 23023.8). Total num frames: 4374528. Throughput: 0: 6541.7. Samples: 1091470. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:37:14,801][45457] Avg episode reward: [(0, '21.734')] +[2024-07-05 00:37:19,800][45457] Fps is (10 sec: 28672.0, 60 sec: 25941.3, 300 sec: 23189.7). Total num frames: 4521984. Throughput: 0: 6646.8. Samples: 1112872. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:37:19,800][45457] Avg episode reward: [(0, '20.028')] +[2024-07-05 00:37:24,800][45457] Fps is (10 sec: 29081.8, 60 sec: 26419.2, 300 sec: 23326.7). Total num frames: 4665344. Throughput: 0: 6876.0. Samples: 1156778. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:37:24,800][45457] Avg episode reward: [(0, '22.795')] +[2024-07-05 00:37:29,800][45457] Fps is (10 sec: 28672.0, 60 sec: 26897.1, 300 sec: 23457.1). Total num frames: 4808704. Throughput: 0: 7079.4. Samples: 1200086. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:37:29,801][45457] Avg episode reward: [(0, '21.090')] +[2024-07-05 00:37:34,800][45457] Fps is (10 sec: 29081.5, 60 sec: 27511.6, 300 sec: 23600.8). Total num frames: 4956160. Throughput: 0: 7157.2. Samples: 1221428. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:37:34,800][45457] Avg episode reward: [(0, '22.262')] +[2024-07-05 00:37:39,800][45457] Fps is (10 sec: 28672.1, 60 sec: 27921.1, 300 sec: 23699.7). Total num frames: 5095424. Throughput: 0: 7146.4. Samples: 1264428. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:37:39,801][45457] Avg episode reward: [(0, '18.676')] +[2024-07-05 00:37:44,800][45457] Fps is (10 sec: 28262.0, 60 sec: 28398.9, 300 sec: 23812.6). Total num frames: 5238784. Throughput: 0: 7148.1. Samples: 1307718. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:37:44,802][45457] Avg episode reward: [(0, '23.004')] +[2024-07-05 00:37:49,800][45457] Fps is (10 sec: 28262.5, 60 sec: 28535.5, 300 sec: 23902.4). Total num frames: 5378048. Throughput: 0: 7120.5. Samples: 1327862. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:37:49,801][45457] Avg episode reward: [(0, '24.526')] +[2024-07-05 00:37:54,800][45457] Fps is (10 sec: 27443.6, 60 sec: 28398.9, 300 sec: 23970.5). Total num frames: 5513216. Throughput: 0: 7125.9. Samples: 1369958. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:37:54,800][45457] Avg episode reward: [(0, '21.678')] +[2024-07-05 00:37:59,800][45457] Fps is (10 sec: 27852.5, 60 sec: 28399.0, 300 sec: 24070.5). Total num frames: 5656576. Throughput: 0: 7118.3. Samples: 1411792. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:37:59,801][45457] Avg episode reward: [(0, '23.258')] +[2024-07-05 00:38:04,800][45457] Fps is (10 sec: 28262.3, 60 sec: 28330.6, 300 sec: 24149.3). Total num frames: 5795840. Throughput: 0: 7095.2. Samples: 1432156. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:38:04,800][45457] Avg episode reward: [(0, '23.293')] +[2024-07-05 00:38:09,800][45457] Fps is (10 sec: 28262.7, 60 sec: 28399.0, 300 sec: 24241.6). Total num frames: 5939200. Throughput: 0: 7071.1. Samples: 1474978. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:38:09,800][45457] Avg episode reward: [(0, '22.360')] +[2024-07-05 00:38:14,800][45457] Fps is (10 sec: 28672.1, 60 sec: 28467.2, 300 sec: 24330.2). Total num frames: 6082560. Throughput: 0: 7066.7. Samples: 1518088. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:38:14,801][45457] Avg episode reward: [(0, '23.374')] +[2024-07-05 00:38:19,800][45457] Fps is (10 sec: 28671.9, 60 sec: 28398.9, 300 sec: 24415.4). Total num frames: 6225920. Throughput: 0: 7075.2. Samples: 1539812. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:38:19,800][45457] Avg episode reward: [(0, '22.925')] +[2024-07-05 00:38:24,800][45457] Fps is (10 sec: 28671.9, 60 sec: 28398.9, 300 sec: 24497.2). Total num frames: 6369280. Throughput: 0: 7076.4. Samples: 1582868. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:38:24,800][45457] Avg episode reward: [(0, '22.703')] +[2024-07-05 00:38:29,800][45457] Fps is (10 sec: 28671.8, 60 sec: 28398.9, 300 sec: 24576.0). Total num frames: 6512640. Throughput: 0: 7078.2. Samples: 1626238. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:38:29,801][45457] Avg episode reward: [(0, '23.612')] +[2024-07-05 00:38:34,800][45457] Fps is (10 sec: 29081.7, 60 sec: 28398.9, 300 sec: 24667.0). Total num frames: 6660096. Throughput: 0: 7109.5. Samples: 1647788. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:38:34,800][45457] Avg episode reward: [(0, '21.244')] +[2024-07-05 00:38:39,800][45457] Fps is (10 sec: 29081.8, 60 sec: 28467.2, 300 sec: 24739.8). Total num frames: 6803456. Throughput: 0: 7140.7. Samples: 1691288. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:38:39,801][45457] Avg episode reward: [(0, '22.165')] +[2024-07-05 00:38:44,800][45457] Fps is (10 sec: 28671.8, 60 sec: 28467.2, 300 sec: 24810.1). Total num frames: 6946816. Throughput: 0: 7170.4. Samples: 1734460. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:38:44,801][45457] Avg episode reward: [(0, '23.205')] +[2024-07-05 00:38:49,800][45457] Fps is (10 sec: 29081.7, 60 sec: 28603.7, 300 sec: 24892.2). Total num frames: 7094272. Throughput: 0: 7194.6. Samples: 1755914. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:38:49,800][45457] Avg episode reward: [(0, '26.917')] +[2024-07-05 00:38:54,800][45457] Fps is (10 sec: 29081.8, 60 sec: 28740.3, 300 sec: 24957.4). Total num frames: 7237632. Throughput: 0: 7222.3. Samples: 1799982. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:38:54,800][45457] Avg episode reward: [(0, '25.928')] +[2024-07-05 00:38:59,800][45457] Fps is (10 sec: 28672.0, 60 sec: 28740.3, 300 sec: 25020.3). Total num frames: 7380992. Throughput: 0: 7216.7. Samples: 1842840. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:38:59,800][45457] Avg episode reward: [(0, '23.616')] +[2024-07-05 00:39:04,800][45457] Fps is (10 sec: 28672.0, 60 sec: 28808.5, 300 sec: 25200.8). Total num frames: 7524352. Throughput: 0: 7218.4. Samples: 1864638. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:39:04,800][45457] Avg episode reward: [(0, '21.793')] +[2024-07-05 00:39:09,800][45457] Fps is (10 sec: 28262.5, 60 sec: 28740.3, 300 sec: 25298.0). Total num frames: 7663616. Throughput: 0: 7188.4. Samples: 1906344. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:39:09,800][45457] Avg episode reward: [(0, '22.157')] +[2024-07-05 00:39:14,800][45457] Fps is (10 sec: 27853.0, 60 sec: 28672.0, 300 sec: 25409.1). Total num frames: 7802880. Throughput: 0: 7158.8. Samples: 1948382. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:39:14,800][45457] Avg episode reward: [(0, '26.325')] +[2024-07-05 00:39:19,800][45457] Fps is (10 sec: 28262.4, 60 sec: 28672.0, 300 sec: 25520.2). Total num frames: 7946240. Throughput: 0: 7160.9. Samples: 1970030. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 00:39:19,800][45457] Avg episode reward: [(0, '23.914')] +[2024-07-05 00:39:24,800][45457] Fps is (10 sec: 29081.5, 60 sec: 28740.3, 300 sec: 25673.0). Total num frames: 8093696. Throughput: 0: 7162.1. Samples: 2013584. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:39:24,800][45457] Avg episode reward: [(0, '22.325')] +[2024-07-05 00:39:29,800][45457] Fps is (10 sec: 29081.2, 60 sec: 28740.3, 300 sec: 25770.1). Total num frames: 8237056. Throughput: 0: 7152.4. Samples: 2056318. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:39:29,800][45457] Avg episode reward: [(0, '24.291')] +[2024-07-05 00:39:34,800][45457] Fps is (10 sec: 28672.1, 60 sec: 28672.0, 300 sec: 25881.2). Total num frames: 8380416. Throughput: 0: 7156.2. Samples: 2077942. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:39:34,800][45457] Avg episode reward: [(0, '22.779')] +[2024-07-05 00:39:39,800][45457] Fps is (10 sec: 28672.2, 60 sec: 28672.0, 300 sec: 25978.4). Total num frames: 8523776. Throughput: 0: 7140.4. Samples: 2121302. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:39:39,801][45457] Avg episode reward: [(0, '23.909')] +[2024-07-05 00:39:44,800][45457] Fps is (10 sec: 28671.9, 60 sec: 28672.0, 300 sec: 26089.4). Total num frames: 8667136. Throughput: 0: 7146.1. Samples: 2164416. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:39:44,801][45457] Avg episode reward: [(0, '22.889')] +[2024-07-05 00:39:49,800][45457] Fps is (10 sec: 28672.0, 60 sec: 28603.7, 300 sec: 26200.5). Total num frames: 8810496. Throughput: 0: 7140.8. Samples: 2185976. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:39:49,801][45457] Avg episode reward: [(0, '23.607')] +[2024-07-05 00:39:54,800][45457] Fps is (10 sec: 28671.9, 60 sec: 28603.7, 300 sec: 26325.5). Total num frames: 8953856. Throughput: 0: 7173.8. Samples: 2229164. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:39:54,801][45457] Avg episode reward: [(0, '24.056')] +[2024-07-05 00:39:59,800][45457] Fps is (10 sec: 28671.9, 60 sec: 28603.7, 300 sec: 26450.5). Total num frames: 9097216. Throughput: 0: 7198.3. Samples: 2272306. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:39:59,800][45457] Avg episode reward: [(0, '21.414')] +[2024-07-05 00:40:04,800][45457] Fps is (10 sec: 29081.9, 60 sec: 28672.0, 300 sec: 26575.4). Total num frames: 9244672. Throughput: 0: 7204.9. Samples: 2294252. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:40:04,801][45457] Avg episode reward: [(0, '24.699')] +[2024-07-05 00:40:09,800][45457] Fps is (10 sec: 29491.4, 60 sec: 28808.5, 300 sec: 26700.4). Total num frames: 9392128. Throughput: 0: 7211.4. Samples: 2338096. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:40:09,800][45457] Avg episode reward: [(0, '24.326')] +[2024-07-05 00:40:14,800][45457] Fps is (10 sec: 29491.0, 60 sec: 28945.0, 300 sec: 26839.3). Total num frames: 9539584. Throughput: 0: 7236.0. Samples: 2381938. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:40:14,800][45457] Avg episode reward: [(0, '24.015')] +[2024-07-05 00:40:19,800][45457] Fps is (10 sec: 29081.5, 60 sec: 28945.0, 300 sec: 26936.4). Total num frames: 9682944. Throughput: 0: 7244.6. Samples: 2403948. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:40:19,801][45457] Avg episode reward: [(0, '24.010')] +[2024-07-05 00:40:24,800][45457] Fps is (10 sec: 29081.8, 60 sec: 28945.1, 300 sec: 27075.3). Total num frames: 9830400. Throughput: 0: 7257.2. Samples: 2447876. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:40:24,800][45457] Avg episode reward: [(0, '24.237')] +[2024-07-05 00:40:29,800][45457] Fps is (10 sec: 29491.3, 60 sec: 29013.4, 300 sec: 27200.2). Total num frames: 9977856. Throughput: 0: 7271.3. Samples: 2491626. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:40:29,800][45457] Avg episode reward: [(0, '25.429')] +[2024-07-05 00:40:34,800][45457] Fps is (10 sec: 29081.4, 60 sec: 29013.3, 300 sec: 27311.4). Total num frames: 10121216. Throughput: 0: 7279.1. Samples: 2513536. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:40:34,800][45457] Avg episode reward: [(0, '23.838')] +[2024-07-05 00:40:39,800][45457] Fps is (10 sec: 29081.5, 60 sec: 29081.6, 300 sec: 27436.3). Total num frames: 10268672. Throughput: 0: 7290.7. Samples: 2557246. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:40:39,800][45457] Avg episode reward: [(0, '20.812')] +[2024-07-05 00:40:44,800][45457] Fps is (10 sec: 29081.5, 60 sec: 29081.6, 300 sec: 27533.4). Total num frames: 10412032. Throughput: 0: 7300.4. Samples: 2600824. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:40:44,801][45457] Avg episode reward: [(0, '24.904')] +[2024-07-05 00:40:49,800][45457] Fps is (10 sec: 29081.7, 60 sec: 29149.9, 300 sec: 27644.6). Total num frames: 10559488. Throughput: 0: 7298.5. Samples: 2622686. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:40:49,801][45457] Avg episode reward: [(0, '23.735')] +[2024-07-05 00:40:54,800][45457] Fps is (10 sec: 28262.5, 60 sec: 29013.3, 300 sec: 27713.9). Total num frames: 10694656. Throughput: 0: 7275.5. Samples: 2665494. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:40:54,801][45457] Avg episode reward: [(0, '24.966')] +[2024-07-05 00:40:59,800][45457] Fps is (10 sec: 27033.4, 60 sec: 28876.8, 300 sec: 27783.4). Total num frames: 10829824. Throughput: 0: 7180.8. Samples: 2705076. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:40:59,801][45457] Avg episode reward: [(0, '26.890')] +[2024-07-05 00:41:04,800][45457] Fps is (10 sec: 27443.2, 60 sec: 28740.2, 300 sec: 27880.6). Total num frames: 10969088. Throughput: 0: 7171.3. Samples: 2726656. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:41:04,801][45457] Avg episode reward: [(0, '23.776')] +[2024-07-05 00:41:09,800][45457] Fps is (10 sec: 27853.0, 60 sec: 28603.7, 300 sec: 27977.8). Total num frames: 11108352. Throughput: 0: 7106.0. Samples: 2767646. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:41:09,801][45457] Avg episode reward: [(0, '24.034')] +[2024-07-05 00:41:14,800][45457] Fps is (10 sec: 27852.8, 60 sec: 28467.2, 300 sec: 28075.0). Total num frames: 11247616. Throughput: 0: 7064.3. Samples: 2809518. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:41:14,801][45457] Avg episode reward: [(0, '23.614')] +[2024-07-05 00:41:19,800][45457] Fps is (10 sec: 27852.5, 60 sec: 28398.9, 300 sec: 28158.3). Total num frames: 11386880. Throughput: 0: 7036.1. Samples: 2830162. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:41:19,801][45457] Avg episode reward: [(0, '23.876')] +[2024-07-05 00:41:24,800][45457] Fps is (10 sec: 27852.3, 60 sec: 28262.3, 300 sec: 28241.6). Total num frames: 11526144. Throughput: 0: 6999.5. Samples: 2872226. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:41:24,801][45457] Avg episode reward: [(0, '24.947')] +[2024-07-05 00:41:29,800][45457] Fps is (10 sec: 28262.0, 60 sec: 28194.0, 300 sec: 28352.7). Total num frames: 11669504. Throughput: 0: 6972.5. Samples: 2914588. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:41:29,801][45457] Avg episode reward: [(0, '25.547')] +[2024-07-05 00:41:34,800][45457] Fps is (10 sec: 28672.7, 60 sec: 28194.2, 300 sec: 28449.9). Total num frames: 11812864. Throughput: 0: 6970.4. Samples: 2936356. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:41:34,800][45457] Avg episode reward: [(0, '25.152')] +[2024-07-05 00:41:39,800][45457] Fps is (10 sec: 29491.9, 60 sec: 28262.4, 300 sec: 28574.8). Total num frames: 11964416. Throughput: 0: 6999.6. Samples: 2980476. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:41:39,800][45457] Avg episode reward: [(0, '28.407')] +[2024-07-05 00:41:44,800][45457] Fps is (10 sec: 29900.7, 60 sec: 28330.7, 300 sec: 28630.4). Total num frames: 12111872. Throughput: 0: 7104.9. Samples: 3024798. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:41:44,800][45457] Avg episode reward: [(0, '25.111')] +[2024-07-05 00:41:49,800][45457] Fps is (10 sec: 29491.1, 60 sec: 28330.6, 300 sec: 28644.2). Total num frames: 12259328. Throughput: 0: 7119.4. Samples: 3047028. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:41:49,800][45457] Avg episode reward: [(0, '26.962')] +[2024-07-05 00:41:54,800][45457] Fps is (10 sec: 29081.5, 60 sec: 28467.2, 300 sec: 28644.2). Total num frames: 12402688. Throughput: 0: 7189.5. Samples: 3091172. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:41:54,801][45457] Avg episode reward: [(0, '27.277')] +[2024-07-05 00:41:59,800][45457] Fps is (10 sec: 28672.0, 60 sec: 28603.7, 300 sec: 28644.2). Total num frames: 12546048. Throughput: 0: 7206.2. Samples: 3133798. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:41:59,801][45457] Avg episode reward: [(0, '26.010')] +[2024-07-05 00:42:04,800][45457] Fps is (10 sec: 28262.2, 60 sec: 28603.7, 300 sec: 28644.2). Total num frames: 12685312. Throughput: 0: 7212.3. Samples: 3154716. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:42:04,801][45457] Avg episode reward: [(0, '27.871')] +[2024-07-05 00:42:09,800][45457] Fps is (10 sec: 28672.0, 60 sec: 28740.2, 300 sec: 28672.0). Total num frames: 12832768. Throughput: 0: 7253.9. Samples: 3198648. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:42:09,800][45457] Avg episode reward: [(0, '27.815')] +[2024-07-05 00:42:14,800][45457] Fps is (10 sec: 29491.4, 60 sec: 28876.8, 300 sec: 28672.0). Total num frames: 12980224. Throughput: 0: 7291.7. Samples: 3242712. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:42:14,800][45457] Avg episode reward: [(0, '29.022')] +[2024-07-05 00:42:19,800][45457] Fps is (10 sec: 29491.3, 60 sec: 29013.4, 300 sec: 28685.9). Total num frames: 13127680. Throughput: 0: 7294.4. Samples: 3264602. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:42:19,800][45457] Avg episode reward: [(0, '28.886')] +[2024-07-05 00:42:24,800][45457] Fps is (10 sec: 29081.8, 60 sec: 29081.7, 300 sec: 28685.9). Total num frames: 13271040. Throughput: 0: 7285.0. Samples: 3308302. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:42:24,801][45457] Avg episode reward: [(0, '26.917')] +[2024-07-05 00:42:29,800][45457] Fps is (10 sec: 29081.5, 60 sec: 29150.0, 300 sec: 28685.9). Total num frames: 13418496. Throughput: 0: 7274.3. Samples: 3352140. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:42:29,801][45457] Avg episode reward: [(0, '25.300')] +[2024-07-05 00:42:34,800][45457] Fps is (10 sec: 29491.1, 60 sec: 29218.1, 300 sec: 28713.7). Total num frames: 13565952. Throughput: 0: 7265.3. Samples: 3373968. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:42:34,801][45457] Avg episode reward: [(0, '28.757')] +[2024-07-05 00:42:39,800][45457] Fps is (10 sec: 29081.7, 60 sec: 29081.6, 300 sec: 28713.7). Total num frames: 13709312. Throughput: 0: 7259.7. Samples: 3417856. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:42:39,800][45457] Avg episode reward: [(0, '28.691')] +[2024-07-05 00:42:44,800][45457] Fps is (10 sec: 29081.5, 60 sec: 29081.6, 300 sec: 28741.4). Total num frames: 13856768. Throughput: 0: 7280.4. Samples: 3461418. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:42:44,801][45457] Avg episode reward: [(0, '28.524')] +[2024-07-05 00:42:49,800][45457] Fps is (10 sec: 29491.2, 60 sec: 29081.6, 300 sec: 28783.1). Total num frames: 14004224. Throughput: 0: 7303.7. Samples: 3483380. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:42:49,800][45457] Avg episode reward: [(0, '26.348')] +[2024-07-05 00:42:54,800][45457] Fps is (10 sec: 29081.7, 60 sec: 29081.6, 300 sec: 28783.1). Total num frames: 14147584. Throughput: 0: 7304.2. Samples: 3527336. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:42:54,800][45457] Avg episode reward: [(0, '27.243')] +[2024-07-05 00:42:59,800][45457] Fps is (10 sec: 29081.4, 60 sec: 29149.9, 300 sec: 28810.8). Total num frames: 14295040. Throughput: 0: 7306.0. Samples: 3571482. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:42:59,801][45457] Avg episode reward: [(0, '26.286')] +[2024-07-05 00:43:04,800][45457] Fps is (10 sec: 29490.8, 60 sec: 29286.4, 300 sec: 28824.7). Total num frames: 14442496. Throughput: 0: 7308.5. Samples: 3593486. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:43:04,800][45457] Avg episode reward: [(0, '29.797')] +[2024-07-05 00:43:09,800][45457] Fps is (10 sec: 29491.1, 60 sec: 29286.4, 300 sec: 28838.6). Total num frames: 14589952. Throughput: 0: 7312.9. Samples: 3637382. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:43:09,800][45457] Avg episode reward: [(0, '30.006')] +[2024-07-05 00:43:14,800][45457] Fps is (10 sec: 29491.6, 60 sec: 29286.4, 300 sec: 28852.5). Total num frames: 14737408. Throughput: 0: 7318.1. Samples: 3681454. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:43:14,800][45457] Avg episode reward: [(0, '28.449')] +[2024-07-05 00:43:19,800][45457] Fps is (10 sec: 29491.4, 60 sec: 29286.4, 300 sec: 28866.4). Total num frames: 14884864. Throughput: 0: 7322.9. Samples: 3703498. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:43:19,800][45457] Avg episode reward: [(0, '26.979')] +[2024-07-05 00:43:24,800][45457] Fps is (10 sec: 29491.4, 60 sec: 29354.7, 300 sec: 28880.3). Total num frames: 15032320. Throughput: 0: 7325.3. Samples: 3747496. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:43:24,800][45457] Avg episode reward: [(0, '26.532')] +[2024-07-05 00:43:29,800][45457] Fps is (10 sec: 29081.7, 60 sec: 29286.4, 300 sec: 28866.4). Total num frames: 15175680. Throughput: 0: 7337.3. Samples: 3791596. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:43:29,800][45457] Avg episode reward: [(0, '27.630')] +[2024-07-05 00:43:34,800][45457] Fps is (10 sec: 29081.5, 60 sec: 29286.4, 300 sec: 28880.3). Total num frames: 15323136. Throughput: 0: 7344.1. Samples: 3813866. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:43:34,800][45457] Avg episode reward: [(0, '30.526')] +[2024-07-05 00:43:39,800][45457] Fps is (10 sec: 27852.6, 60 sec: 29081.6, 300 sec: 28838.6). Total num frames: 15454208. Throughput: 0: 7273.0. Samples: 3854622. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 00:43:39,800][45457] Avg episode reward: [(0, '28.501')] +[2024-07-05 00:43:44,800][45457] Fps is (10 sec: 27033.5, 60 sec: 28945.1, 300 sec: 28810.8). Total num frames: 15593472. Throughput: 0: 7204.0. Samples: 3895660. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 00:43:44,800][45457] Avg episode reward: [(0, '26.379')] +[2024-07-05 00:43:49,800][45457] Fps is (10 sec: 27852.9, 60 sec: 28808.5, 300 sec: 28797.0). Total num frames: 15732736. Throughput: 0: 7178.3. Samples: 3916510. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:43:49,801][45457] Avg episode reward: [(0, '26.720')] +[2024-07-05 00:43:54,800][45457] Fps is (10 sec: 28262.3, 60 sec: 28808.5, 300 sec: 28797.0). Total num frames: 15876096. Throughput: 0: 7154.8. Samples: 3959346. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:43:54,800][45457] Avg episode reward: [(0, '27.638')] +[2024-07-05 00:43:59,800][45457] Fps is (10 sec: 28671.9, 60 sec: 28740.3, 300 sec: 28797.0). Total num frames: 16019456. Throughput: 0: 7134.0. Samples: 4002484. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 00:43:59,801][45457] Avg episode reward: [(0, '31.449')] +[2024-07-05 00:44:04,800][45457] Fps is (10 sec: 29081.8, 60 sec: 28740.4, 300 sec: 28824.7). Total num frames: 16166912. Throughput: 0: 7139.6. Samples: 4024780. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:44:04,800][45457] Avg episode reward: [(0, '34.074')] +[2024-07-05 00:44:09,800][45457] Fps is (10 sec: 29081.8, 60 sec: 28672.1, 300 sec: 28838.6). Total num frames: 16310272. Throughput: 0: 7123.0. Samples: 4068032. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:44:09,800][45457] Avg episode reward: [(0, '28.801')] +[2024-07-05 00:44:14,800][45457] Fps is (10 sec: 29081.5, 60 sec: 28672.0, 300 sec: 28852.5). Total num frames: 16457728. Throughput: 0: 7108.7. Samples: 4111488. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:44:14,800][45457] Avg episode reward: [(0, '29.606')] +[2024-07-05 00:44:19,800][45457] Fps is (10 sec: 29081.4, 60 sec: 28603.7, 300 sec: 28838.6). Total num frames: 16601088. Throughput: 0: 7099.3. Samples: 4133334. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:44:19,800][45457] Avg episode reward: [(0, '33.475')] +[2024-07-05 00:44:24,800][45457] Fps is (10 sec: 28671.9, 60 sec: 28535.4, 300 sec: 28838.6). Total num frames: 16744448. Throughput: 0: 7163.8. Samples: 4176994. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:44:24,801][45457] Avg episode reward: [(0, '29.985')] +[2024-07-05 00:44:29,800][45457] Fps is (10 sec: 29081.7, 60 sec: 28603.7, 300 sec: 28852.5). Total num frames: 16891904. Throughput: 0: 7222.4. Samples: 4220670. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:44:29,800][45457] Avg episode reward: [(0, '30.740')] +[2024-07-05 00:44:34,800][45457] Fps is (10 sec: 29081.6, 60 sec: 28535.4, 300 sec: 28852.5). Total num frames: 17035264. Throughput: 0: 7241.3. Samples: 4242370. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:44:34,801][45457] Avg episode reward: [(0, '27.860')] +[2024-07-05 00:44:39,800][45457] Fps is (10 sec: 28671.7, 60 sec: 28740.2, 300 sec: 28852.5). Total num frames: 17178624. Throughput: 0: 7243.6. Samples: 4285310. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:44:39,801][45457] Avg episode reward: [(0, '31.510')] +[2024-07-05 00:44:44,800][45457] Fps is (10 sec: 29081.6, 60 sec: 28876.8, 300 sec: 28866.4). Total num frames: 17326080. Throughput: 0: 7252.7. Samples: 4328858. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:44:44,800][45457] Avg episode reward: [(0, '33.207')] +[2024-07-05 00:44:49,800][45457] Fps is (10 sec: 29081.9, 60 sec: 28945.1, 300 sec: 28866.4). Total num frames: 17469440. Throughput: 0: 7240.0. Samples: 4350580. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:44:49,800][45457] Avg episode reward: [(0, '28.141')] +[2024-07-05 00:44:54,800][45457] Fps is (10 sec: 29081.6, 60 sec: 29013.3, 300 sec: 28880.3). Total num frames: 17616896. Throughput: 0: 7246.5. Samples: 4394124. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:44:54,800][45457] Avg episode reward: [(0, '28.240')] +[2024-07-05 00:44:59,800][45457] Fps is (10 sec: 29081.4, 60 sec: 29013.3, 300 sec: 28866.4). Total num frames: 17760256. Throughput: 0: 7253.3. Samples: 4437888. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:44:59,800][45457] Avg episode reward: [(0, '26.141')] +[2024-07-05 00:45:04,800][45457] Fps is (10 sec: 29081.8, 60 sec: 29013.3, 300 sec: 28866.4). Total num frames: 17907712. Throughput: 0: 7259.3. Samples: 4460004. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:45:04,800][45457] Avg episode reward: [(0, '27.048')] +[2024-07-05 00:45:09,800][45457] Fps is (10 sec: 29491.2, 60 sec: 29081.6, 300 sec: 28866.4). Total num frames: 18055168. Throughput: 0: 7268.8. Samples: 4504092. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:45:09,800][45457] Avg episode reward: [(0, '32.117')] +[2024-07-05 00:45:14,800][45457] Fps is (10 sec: 29491.1, 60 sec: 29081.6, 300 sec: 28880.3). Total num frames: 18202624. Throughput: 0: 7269.5. Samples: 4547796. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:45:14,800][45457] Avg episode reward: [(0, '31.444')] +[2024-07-05 00:45:19,800][45457] Fps is (10 sec: 29081.7, 60 sec: 29081.6, 300 sec: 28866.4). Total num frames: 18345984. Throughput: 0: 7271.6. Samples: 4569592. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:45:19,800][45457] Avg episode reward: [(0, '31.100')] +[2024-07-05 00:45:24,800][45457] Fps is (10 sec: 28672.0, 60 sec: 29081.6, 300 sec: 28852.5). Total num frames: 18489344. Throughput: 0: 7277.7. Samples: 4612804. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:45:24,800][45457] Avg episode reward: [(0, '29.468')] +[2024-07-05 00:45:29,800][45457] Fps is (10 sec: 28671.9, 60 sec: 29013.3, 300 sec: 28852.5). Total num frames: 18632704. Throughput: 0: 7267.1. Samples: 4655876. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:45:29,800][45457] Avg episode reward: [(0, '31.971')] +[2024-07-05 00:45:34,800][45457] Fps is (10 sec: 28671.8, 60 sec: 29013.3, 300 sec: 28838.6). Total num frames: 18776064. Throughput: 0: 7264.3. Samples: 4677472. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:45:34,800][45457] Avg episode reward: [(0, '31.432')] +[2024-07-05 00:45:39,800][45457] Fps is (10 sec: 29081.8, 60 sec: 29081.7, 300 sec: 28852.5). Total num frames: 18923520. Throughput: 0: 7264.2. Samples: 4721014. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:45:39,800][45457] Avg episode reward: [(0, '29.573')] +[2024-07-05 00:45:44,800][45457] Fps is (10 sec: 29081.5, 60 sec: 29013.3, 300 sec: 28838.6). Total num frames: 19066880. Throughput: 0: 7254.3. Samples: 4764330. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:45:44,801][45457] Avg episode reward: [(0, '31.777')] +[2024-07-05 00:45:49,800][45457] Fps is (10 sec: 29081.4, 60 sec: 29081.6, 300 sec: 28880.3). Total num frames: 19214336. Throughput: 0: 7248.6. Samples: 4786190. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:45:49,801][45457] Avg episode reward: [(0, '32.344')] +[2024-07-05 00:45:54,800][45457] Fps is (10 sec: 29081.7, 60 sec: 29013.3, 300 sec: 28908.0). Total num frames: 19357696. Throughput: 0: 7240.8. Samples: 4829928. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:45:54,800][45457] Avg episode reward: [(0, '32.536')] +[2024-07-05 00:45:59,800][45457] Fps is (10 sec: 29081.3, 60 sec: 29081.6, 300 sec: 28935.8). Total num frames: 19505152. Throughput: 0: 7232.0. Samples: 4873236. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:45:59,801][45457] Avg episode reward: [(0, '31.148')] +[2024-07-05 00:46:04,800][45457] Fps is (10 sec: 29081.8, 60 sec: 29013.3, 300 sec: 28949.7). Total num frames: 19648512. Throughput: 0: 7231.4. Samples: 4895004. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:46:04,801][45457] Avg episode reward: [(0, '31.797')] +[2024-07-05 00:46:09,800][45457] Fps is (10 sec: 28672.3, 60 sec: 28945.1, 300 sec: 28963.6). Total num frames: 19791872. Throughput: 0: 7236.4. Samples: 4938444. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 00:46:09,800][45457] Avg episode reward: [(0, '32.176')] +[2024-07-05 00:46:14,800][45457] Fps is (10 sec: 28671.9, 60 sec: 28876.8, 300 sec: 28977.5). Total num frames: 19935232. Throughput: 0: 7242.5. Samples: 4981788. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 00:46:14,800][45457] Avg episode reward: [(0, '34.624')] +[2024-07-05 00:46:17,083][45457] Component Batcher_0 stopped! +[2024-07-05 00:46:17,095][45457] Component RolloutWorker_w7 stopped! +[2024-07-05 00:46:17,096][45457] Component RolloutWorker_w2 stopped! +[2024-07-05 00:46:17,097][45457] Component RolloutWorker_w6 stopped! +[2024-07-05 00:46:17,097][45457] Component RolloutWorker_w5 stopped! +[2024-07-05 00:46:17,098][45457] Component RolloutWorker_w1 stopped! +[2024-07-05 00:46:17,099][45457] Component RolloutWorker_w4 stopped! +[2024-07-05 00:46:17,099][45457] Component RolloutWorker_w3 stopped! +[2024-07-05 00:46:17,100][45457] Component RolloutWorker_w0 stopped! +[2024-07-05 00:46:17,116][45457] Component InferenceWorker_p0-w0 stopped! +[2024-07-05 00:46:17,250][45457] Component LearnerWorker_p0 stopped! +[2024-07-05 00:46:17,251][45457] Waiting for process learner_proc0 to stop... +[2024-07-05 00:46:18,226][45457] Waiting for process inference_proc0-0 to join... +[2024-07-05 00:46:18,227][45457] Waiting for process rollout_proc0 to join... +[2024-07-05 00:46:18,227][45457] Waiting for process rollout_proc1 to join... +[2024-07-05 00:46:18,228][45457] Waiting for process rollout_proc2 to join... +[2024-07-05 00:46:18,228][45457] Waiting for process rollout_proc3 to join... +[2024-07-05 00:46:18,228][45457] Waiting for process rollout_proc4 to join... +[2024-07-05 00:46:18,229][45457] Waiting for process rollout_proc5 to join... +[2024-07-05 00:46:18,229][45457] Waiting for process rollout_proc6 to join... +[2024-07-05 00:46:18,230][45457] Waiting for process rollout_proc7 to join... +[2024-07-05 00:46:18,230][45457] Batcher 0 profile tree view: +batching: 26.9927, releasing_batches: 0.1167 +[2024-07-05 00:46:18,231][45457] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 8.8195 +update_model: 10.7823 + weight_update: 0.0007 +one_step: 0.0022 + handle_policy_step: 681.7734 + deserialize: 26.6959, stack: 3.6982, obs_to_device_normalize: 149.7028, forward: 372.6342, send_messages: 32.3696 + prepare_outputs: 72.1520 + to_cpu: 43.3974 +[2024-07-05 00:46:18,231][45457] Learner 0 profile tree view: +misc: 0.0172, prepare_batch: 44.0229 +train: 124.9755 + epoch_init: 0.0157, minibatch_init: 0.0230, losses_postprocess: 0.6276, kl_divergence: 0.6599, after_optimizer: 50.0722 + calculate_losses: 55.1219 + losses_init: 0.0092, forward_head: 2.0814, bptt_initial: 42.7142, tail: 2.0637, advantages_returns: 0.4992, losses: 2.3709 + bptt: 4.7810 + bptt_forward_core: 4.5881 + update: 17.1506 + clip: 2.1479 +[2024-07-05 00:46:18,231][45457] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.3953, enqueue_policy_requests: 22.8109, env_step: 280.5938, overhead: 25.4768, complete_rollouts: 0.6825 +save_policy_outputs: 21.3364 + split_output_tensors: 10.1050 +[2024-07-05 00:46:18,231][45457] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3921, enqueue_policy_requests: 23.0384, env_step: 308.7818, overhead: 25.1818, complete_rollouts: 0.6843 +save_policy_outputs: 21.6358 + split_output_tensors: 10.4110 +[2024-07-05 00:46:18,232][45457] Loop Runner_EvtLoop terminating... +[2024-07-05 00:46:18,232][45457] Runner profile tree view: +main_loop: 740.1108 +[2024-07-05 00:46:18,232][45457] Collected {0: 20004864}, FPS: 27029.6 +[2024-07-05 00:47:20,823][45457] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json +[2024-07-05 00:47:20,823][45457] Overriding arg 'num_workers' with value 1 passed from command line +[2024-07-05 00:47:20,824][45457] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-07-05 00:47:20,824][45457] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-07-05 00:47:20,824][45457] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 00:47:20,824][45457] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-07-05 00:47:20,824][45457] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 00:47:20,825][45457] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-07-05 00:47:20,825][45457] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-07-05 00:47:20,825][45457] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-07-05 00:47:20,825][45457] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-07-05 00:47:20,826][45457] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-07-05 00:47:20,826][45457] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-07-05 00:47:20,826][45457] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-07-05 00:47:20,827][45457] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-07-05 00:47:20,842][45457] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 00:47:20,843][45457] RunningMeanStd input shape: (1,) +[2024-07-05 00:47:20,849][45457] ConvEncoder: input_channels=3 +[2024-07-05 00:47:20,880][45457] Conv encoder output size: 512 +[2024-07-05 00:47:20,881][45457] Policy head output size: 512 +[2024-07-05 00:47:20,897][45457] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth... +[2024-07-05 00:47:21,524][45457] Num frames 100... +[2024-07-05 00:47:21,583][45457] Num frames 200... +[2024-07-05 00:47:21,641][45457] Num frames 300... +[2024-07-05 00:47:21,702][45457] Num frames 400... +[2024-07-05 00:47:21,761][45457] Num frames 500... +[2024-07-05 00:47:21,819][45457] Num frames 600... +[2024-07-05 00:47:21,878][45457] Num frames 700... +[2024-07-05 00:47:21,937][45457] Num frames 800... +[2024-07-05 00:47:21,995][45457] Num frames 900... +[2024-07-05 00:47:22,061][45457] Num frames 1000... +[2024-07-05 00:47:22,124][45457] Num frames 1100... +[2024-07-05 00:47:22,189][45457] Num frames 1200... +[2024-07-05 00:47:22,256][45457] Num frames 1300... +[2024-07-05 00:47:22,320][45457] Num frames 1400... +[2024-07-05 00:47:22,384][45457] Num frames 1500... +[2024-07-05 00:47:22,450][45457] Num frames 1600... +[2024-07-05 00:47:22,545][45457] Avg episode rewards: #0: 40.640, true rewards: #0: 16.640 +[2024-07-05 00:47:22,546][45457] Avg episode reward: 40.640, avg true_objective: 16.640 +[2024-07-05 00:47:22,574][45457] Num frames 1700... +[2024-07-05 00:47:22,636][45457] Num frames 1800... +[2024-07-05 00:47:22,699][45457] Num frames 1900... +[2024-07-05 00:47:22,761][45457] Num frames 2000... +[2024-07-05 00:47:22,823][45457] Num frames 2100... +[2024-07-05 00:47:22,885][45457] Num frames 2200... +[2024-07-05 00:47:22,944][45457] Num frames 2300... +[2024-07-05 00:47:23,039][45457] Avg episode rewards: #0: 27.840, true rewards: #0: 11.840 +[2024-07-05 00:47:23,041][45457] Avg episode reward: 27.840, avg true_objective: 11.840 +[2024-07-05 00:47:23,066][45457] Num frames 2400... +[2024-07-05 00:47:23,128][45457] Num frames 2500... +[2024-07-05 00:47:23,190][45457] Num frames 2600... +[2024-07-05 00:47:23,248][45457] Num frames 2700... +[2024-07-05 00:47:23,308][45457] Num frames 2800... +[2024-07-05 00:47:23,366][45457] Num frames 2900... +[2024-07-05 00:47:23,423][45457] Num frames 3000... +[2024-07-05 00:47:23,482][45457] Num frames 3100... +[2024-07-05 00:47:23,542][45457] Num frames 3200... +[2024-07-05 00:47:23,605][45457] Num frames 3300... +[2024-07-05 00:47:23,668][45457] Num frames 3400... +[2024-07-05 00:47:23,732][45457] Num frames 3500... +[2024-07-05 00:47:23,802][45457] Num frames 3600... +[2024-07-05 00:47:23,869][45457] Num frames 3700... +[2024-07-05 00:47:23,936][45457] Num frames 3800... +[2024-07-05 00:47:24,009][45457] Num frames 3900... +[2024-07-05 00:47:24,087][45457] Num frames 4000... +[2024-07-05 00:47:24,161][45457] Num frames 4100... +[2024-07-05 00:47:24,233][45457] Avg episode rewards: #0: 33.093, true rewards: #0: 13.760 +[2024-07-05 00:47:24,234][45457] Avg episode reward: 33.093, avg true_objective: 13.760 +[2024-07-05 00:47:24,295][45457] Num frames 4200... +[2024-07-05 00:47:24,355][45457] Num frames 4300... +[2024-07-05 00:47:24,415][45457] Num frames 4400... +[2024-07-05 00:47:24,478][45457] Num frames 4500... +[2024-07-05 00:47:24,539][45457] Num frames 4600... +[2024-07-05 00:47:24,599][45457] Num frames 4700... +[2024-07-05 00:47:24,659][45457] Num frames 4800... +[2024-07-05 00:47:24,719][45457] Num frames 4900... +[2024-07-05 00:47:24,779][45457] Num frames 5000... +[2024-07-05 00:47:24,838][45457] Num frames 5100... +[2024-07-05 00:47:24,898][45457] Num frames 5200... +[2024-07-05 00:47:24,957][45457] Num frames 5300... +[2024-07-05 00:47:25,018][45457] Num frames 5400... +[2024-07-05 00:47:25,077][45457] Num frames 5500... +[2024-07-05 00:47:25,137][45457] Num frames 5600... +[2024-07-05 00:47:25,197][45457] Num frames 5700... +[2024-07-05 00:47:25,256][45457] Num frames 5800... +[2024-07-05 00:47:25,360][45457] Avg episode rewards: #0: 37.470, true rewards: #0: 14.720 +[2024-07-05 00:47:25,361][45457] Avg episode reward: 37.470, avg true_objective: 14.720 +[2024-07-05 00:47:25,373][45457] Num frames 5900... +[2024-07-05 00:47:25,433][45457] Num frames 6000... +[2024-07-05 00:47:25,492][45457] Num frames 6100... +[2024-07-05 00:47:25,552][45457] Num frames 6200... +[2024-07-05 00:47:25,613][45457] Num frames 6300... +[2024-07-05 00:47:25,672][45457] Num frames 6400... +[2024-07-05 00:47:25,731][45457] Num frames 6500... +[2024-07-05 00:47:25,791][45457] Num frames 6600... +[2024-07-05 00:47:25,851][45457] Num frames 6700... +[2024-07-05 00:47:25,909][45457] Num frames 6800... +[2024-07-05 00:47:25,968][45457] Num frames 6900... +[2024-07-05 00:47:26,026][45457] Num frames 7000... +[2024-07-05 00:47:26,085][45457] Num frames 7100... +[2024-07-05 00:47:26,141][45457] Avg episode rewards: #0: 34.808, true rewards: #0: 14.208 +[2024-07-05 00:47:26,143][45457] Avg episode reward: 34.808, avg true_objective: 14.208 +[2024-07-05 00:47:26,206][45457] Num frames 7200... +[2024-07-05 00:47:26,264][45457] Num frames 7300... +[2024-07-05 00:47:26,323][45457] Num frames 7400... +[2024-07-05 00:47:26,383][45457] Num frames 7500... +[2024-07-05 00:47:26,444][45457] Num frames 7600... +[2024-07-05 00:47:26,505][45457] Num frames 7700... +[2024-07-05 00:47:26,569][45457] Num frames 7800... +[2024-07-05 00:47:26,627][45457] Num frames 7900... +[2024-07-05 00:47:26,687][45457] Num frames 8000... +[2024-07-05 00:47:26,749][45457] Num frames 8100... +[2024-07-05 00:47:26,814][45457] Num frames 8200... +[2024-07-05 00:47:26,875][45457] Num frames 8300... +[2024-07-05 00:47:26,935][45457] Num frames 8400... +[2024-07-05 00:47:26,992][45457] Num frames 8500... +[2024-07-05 00:47:27,054][45457] Num frames 8600... +[2024-07-05 00:47:27,112][45457] Avg episode rewards: #0: 35.513, true rewards: #0: 14.347 +[2024-07-05 00:47:27,114][45457] Avg episode reward: 35.513, avg true_objective: 14.347 +[2024-07-05 00:47:27,174][45457] Num frames 8700... +[2024-07-05 00:47:27,232][45457] Num frames 8800... +[2024-07-05 00:47:27,290][45457] Num frames 8900... +[2024-07-05 00:47:27,362][45457] Num frames 9000... +[2024-07-05 00:47:27,423][45457] Num frames 9100... +[2024-07-05 00:47:27,484][45457] Num frames 9200... +[2024-07-05 00:47:27,544][45457] Num frames 9300... +[2024-07-05 00:47:27,604][45457] Num frames 9400... +[2024-07-05 00:47:27,665][45457] Num frames 9500... +[2024-07-05 00:47:27,727][45457] Num frames 9600... +[2024-07-05 00:47:27,790][45457] Avg episode rewards: #0: 33.734, true rewards: #0: 13.734 +[2024-07-05 00:47:27,792][45457] Avg episode reward: 33.734, avg true_objective: 13.734 +[2024-07-05 00:47:27,851][45457] Num frames 9700... +[2024-07-05 00:47:27,909][45457] Num frames 9800... +[2024-07-05 00:47:27,967][45457] Num frames 9900... +[2024-07-05 00:47:28,025][45457] Num frames 10000... +[2024-07-05 00:47:28,086][45457] Num frames 10100... +[2024-07-05 00:47:28,146][45457] Num frames 10200... +[2024-07-05 00:47:28,208][45457] Num frames 10300... +[2024-07-05 00:47:28,268][45457] Num frames 10400... +[2024-07-05 00:47:28,327][45457] Num frames 10500... +[2024-07-05 00:47:28,388][45457] Num frames 10600... +[2024-07-05 00:47:28,446][45457] Avg episode rewards: #0: 32.132, true rewards: #0: 13.257 +[2024-07-05 00:47:28,447][45457] Avg episode reward: 32.132, avg true_objective: 13.257 +[2024-07-05 00:47:28,507][45457] Num frames 10700... +[2024-07-05 00:47:28,567][45457] Num frames 10800... +[2024-07-05 00:47:28,627][45457] Num frames 10900... +[2024-07-05 00:47:28,688][45457] Num frames 11000... +[2024-07-05 00:47:28,750][45457] Num frames 11100... +[2024-07-05 00:47:28,810][45457] Num frames 11200... +[2024-07-05 00:47:28,870][45457] Num frames 11300... +[2024-07-05 00:47:28,928][45457] Num frames 11400... +[2024-07-05 00:47:29,024][45457] Avg episode rewards: #0: 30.744, true rewards: #0: 12.744 +[2024-07-05 00:47:29,025][45457] Avg episode reward: 30.744, avg true_objective: 12.744 +[2024-07-05 00:47:29,049][45457] Num frames 11500... +[2024-07-05 00:47:29,109][45457] Num frames 11600... +[2024-07-05 00:47:29,165][45457] Num frames 11700... +[2024-07-05 00:47:29,225][45457] Num frames 11800... +[2024-07-05 00:47:29,285][45457] Num frames 11900... +[2024-07-05 00:47:29,363][45457] Avg episode rewards: #0: 28.639, true rewards: #0: 11.939 +[2024-07-05 00:47:29,364][45457] Avg episode reward: 28.639, avg true_objective: 11.939 +[2024-07-05 00:47:41,804][45457] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! +[2024-07-05 10:23:44,255][11302] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json... +[2024-07-05 10:23:44,257][11302] Rollout worker 0 uses device cpu +[2024-07-05 10:23:44,258][11302] Rollout worker 1 uses device cpu +[2024-07-05 10:23:44,258][11302] Rollout worker 2 uses device cpu +[2024-07-05 10:23:44,258][11302] Rollout worker 3 uses device cpu +[2024-07-05 10:23:44,259][11302] Rollout worker 4 uses device cpu +[2024-07-05 10:23:44,259][11302] Rollout worker 5 uses device cpu +[2024-07-05 10:23:44,259][11302] Rollout worker 6 uses device cpu +[2024-07-05 10:23:44,260][11302] Rollout worker 7 uses device cpu +[2024-07-05 10:23:44,260][11302] Rollout worker 8 uses device cpu +[2024-07-05 10:23:44,260][11302] Rollout worker 9 uses device cpu +[2024-07-05 10:23:44,260][11302] Rollout worker 10 uses device cpu +[2024-07-05 10:23:44,261][11302] Rollout worker 11 uses device cpu +[2024-07-05 10:23:44,261][11302] Rollout worker 12 uses device cpu +[2024-07-05 10:23:44,261][11302] Rollout worker 13 uses device cpu +[2024-07-05 10:23:44,262][11302] Rollout worker 14 uses device cpu +[2024-07-05 10:23:44,262][11302] Rollout worker 15 uses device cpu +[2024-07-05 10:23:44,370][11302] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:23:44,371][11302] InferenceWorker_p0-w0: min num requests: 5 +[2024-07-05 10:23:44,460][11302] Starting all processes... +[2024-07-05 10:23:44,461][11302] Starting process learner_proc0 +[2024-07-05 10:23:45,124][11302] Starting all processes... +[2024-07-05 10:23:45,131][11302] Starting process inference_proc0-0 +[2024-07-05 10:23:45,132][11302] Starting process rollout_proc0 +[2024-07-05 10:23:45,132][11302] Starting process rollout_proc1 +[2024-07-05 10:23:45,133][11302] Starting process rollout_proc2 +[2024-07-05 10:23:45,135][11302] Starting process rollout_proc3 +[2024-07-05 10:23:45,136][11302] Starting process rollout_proc4 +[2024-07-05 10:23:45,140][11302] Starting process rollout_proc5 +[2024-07-05 10:23:45,142][11302] Starting process rollout_proc6 +[2024-07-05 10:23:45,142][11302] Starting process rollout_proc7 +[2024-07-05 10:23:45,142][11302] Starting process rollout_proc8 +[2024-07-05 10:23:45,143][11302] Starting process rollout_proc9 +[2024-07-05 10:23:45,144][11302] Starting process rollout_proc10 +[2024-07-05 10:23:45,145][11302] Starting process rollout_proc11 +[2024-07-05 10:23:45,148][11302] Starting process rollout_proc12 +[2024-07-05 10:23:45,157][11302] Starting process rollout_proc13 +[2024-07-05 10:23:45,157][11302] Starting process rollout_proc14 +[2024-07-05 10:23:45,171][11302] Starting process rollout_proc15 +[2024-07-05 10:23:49,312][11874] Worker 7 uses CPU cores [7] +[2024-07-05 10:23:49,324][11867] Worker 0 uses CPU cores [0] +[2024-07-05 10:23:49,436][11875] Worker 8 uses CPU cores [8] +[2024-07-05 10:23:49,460][11869] Worker 2 uses CPU cores [2] +[2024-07-05 10:23:49,665][11878] Worker 11 uses CPU cores [11] +[2024-07-05 10:23:49,698][11846] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:23:49,699][11846] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-07-05 10:23:49,725][11894] Worker 12 uses CPU cores [12] +[2024-07-05 10:23:49,735][11871] Worker 4 uses CPU cores [4] +[2024-07-05 10:23:49,736][11876] Worker 10 uses CPU cores [10] +[2024-07-05 10:23:49,753][11870] Worker 3 uses CPU cores [3] +[2024-07-05 10:23:49,772][11897] Worker 15 uses CPU cores [15] +[2024-07-05 10:23:49,782][11895] Worker 13 uses CPU cores [13] +[2024-07-05 10:23:49,783][11846] Num visible devices: 1 +[2024-07-05 10:23:49,823][11877] Worker 9 uses CPU cores [9] +[2024-07-05 10:23:49,832][11846] Setting fixed seed 200 +[2024-07-05 10:23:49,846][11846] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:23:49,846][11846] Initializing actor-critic model on device cuda:0 +[2024-07-05 10:23:49,846][11846] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 10:23:49,847][11846] RunningMeanStd input shape: (1,) +[2024-07-05 10:23:49,852][11896] Worker 14 uses CPU cores [14] +[2024-07-05 10:23:49,862][11846] Num input channels: 3 +[2024-07-05 10:23:49,867][11872] Worker 5 uses CPU cores [5] +[2024-07-05 10:23:49,902][11868] Worker 1 uses CPU cores [1] +[2024-07-05 10:23:49,906][11846] Convolutional layer output size: 4608 +[2024-07-05 10:23:49,926][11846] Policy head output size: 512 +[2024-07-05 10:23:50,058][11873] Worker 6 uses CPU cores [6] +[2024-07-05 10:23:50,058][11846] Created Actor Critic model with architecture: +[2024-07-05 10:23:50,058][11846] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -7907,23 +1844,67 @@ git_repo_name=not a git repository ) (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) + (basic_encoder): ResnetEncoder( + (conv_head): Sequential( + (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (2): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (3): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) + (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (5): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (6): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) + (7): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (9): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (10): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (11): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (12): ELU(alpha=1.0) + ) + (mlp_layers): Sequential( + (0): Linear(in_features=4608, out_features=512, bias=True) + (1): ELU(alpha=1.0) ) ) ) @@ -7938,423 +1919,354 @@ git_repo_name=not a git repository (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-07-05 12:14:49,612][51898] Worker 7 uses CPU cores [7] -[2024-07-05 12:14:49,688][51853] Using optimizer -[2024-07-05 12:14:49,724][51879] Worker 5 uses CPU cores [5] -[2024-07-05 12:14:49,756][51902] Worker 13 uses CPU cores [13] -[2024-07-05 12:14:49,776][51895] Worker 9 uses CPU cores [9] -[2024-07-05 12:14:49,899][51900] Worker 12 uses CPU cores [12] -[2024-07-05 12:14:50,045][51901] Worker 11 uses CPU cores [11] -[2024-07-05 12:14:50,365][51853] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000020754_150011904.pth... -[2024-07-05 12:14:50,399][51853] Loading model from checkpoint -[2024-07-05 12:14:50,401][51853] Loaded experiment state at self.train_step=20754, self.env_steps=150011904 -[2024-07-05 12:14:50,401][51853] Initialized policy 0 weights for model version 20754 -[2024-07-05 12:14:50,402][51853] LearnerWorker_p0 finished initialization! -[2024-07-05 12:14:50,402][51853] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:14:50,491][51873] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 12:14:50,492][51873] RunningMeanStd input shape: (1,) -[2024-07-05 12:14:50,499][51873] ConvEncoder: input_channels=3 -[2024-07-05 12:14:50,553][51873] Conv encoder output size: 512 -[2024-07-05 12:14:50,553][51873] Policy head output size: 512 -[2024-07-05 12:14:50,589][25826] Inference worker 0-0 is ready! -[2024-07-05 12:14:50,590][25826] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 12:14:50,665][51901] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,667][51874] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,667][51876] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,668][51878] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,668][51903] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,668][51875] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,670][51897] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,670][51877] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,672][51904] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,677][51895] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,677][51898] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,686][51899] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,685][51879] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,686][51902] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,688][51896] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,696][51900] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:14:50,867][25826] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 150011904. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 12:14:51,300][51878] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,300][51877] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,301][51898] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,302][51903] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,304][51876] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,304][51874] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,484][51901] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,489][51876] Decorrelating experience for 32 frames... -[2024-07-05 12:14:51,511][51879] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,518][51895] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,546][51903] Decorrelating experience for 32 frames... -[2024-07-05 12:14:51,573][51904] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,655][51901] Decorrelating experience for 32 frames... -[2024-07-05 12:14:51,660][51900] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,672][51874] Decorrelating experience for 32 frames... -[2024-07-05 12:14:51,734][51895] Decorrelating experience for 32 frames... -[2024-07-05 12:14:51,797][51897] Decorrelating experience for 0 frames... -[2024-07-05 12:14:51,837][51900] Decorrelating experience for 32 frames... -[2024-07-05 12:14:51,845][51904] Decorrelating experience for 32 frames... -[2024-07-05 12:14:51,848][51879] Decorrelating experience for 32 frames... -[2024-07-05 12:14:51,854][51874] Decorrelating experience for 64 frames... -[2024-07-05 12:14:51,912][51896] Decorrelating experience for 0 frames... -[2024-07-05 12:14:52,022][51900] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,022][51878] Decorrelating experience for 32 frames... -[2024-07-05 12:14:52,030][51877] Decorrelating experience for 32 frames... -[2024-07-05 12:14:52,038][51903] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,088][51897] Decorrelating experience for 32 frames... -[2024-07-05 12:14:52,092][51895] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,152][51879] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,225][51899] Decorrelating experience for 0 frames... -[2024-07-05 12:14:52,239][51896] Decorrelating experience for 32 frames... -[2024-07-05 12:14:52,249][51875] Decorrelating experience for 0 frames... -[2024-07-05 12:14:52,263][51878] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,282][51876] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,283][51897] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,345][51903] Decorrelating experience for 96 frames... -[2024-07-05 12:14:52,433][51875] Decorrelating experience for 32 frames... -[2024-07-05 12:14:52,497][51897] Decorrelating experience for 96 frames... -[2024-07-05 12:14:52,500][51876] Decorrelating experience for 96 frames... -[2024-07-05 12:14:52,509][51896] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,522][51898] Decorrelating experience for 32 frames... -[2024-07-05 12:14:52,527][51899] Decorrelating experience for 32 frames... -[2024-07-05 12:14:52,548][51900] Decorrelating experience for 96 frames... -[2024-07-05 12:14:52,553][51904] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,754][51901] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,755][51895] Decorrelating experience for 96 frames... -[2024-07-05 12:14:52,769][51903] Decorrelating experience for 128 frames... -[2024-07-05 12:14:52,813][51902] Decorrelating experience for 0 frames... -[2024-07-05 12:14:52,826][51874] Decorrelating experience for 96 frames... -[2024-07-05 12:14:52,833][51898] Decorrelating experience for 64 frames... -[2024-07-05 12:14:52,835][51896] Decorrelating experience for 96 frames... -[2024-07-05 12:14:52,935][51897] Decorrelating experience for 128 frames... -[2024-07-05 12:14:53,052][51879] Decorrelating experience for 96 frames... -[2024-07-05 12:14:53,055][51899] Decorrelating experience for 64 frames... -[2024-07-05 12:14:53,064][51876] Decorrelating experience for 128 frames... -[2024-07-05 12:14:53,071][51904] Decorrelating experience for 96 frames... -[2024-07-05 12:14:53,074][51900] Decorrelating experience for 128 frames... -[2024-07-05 12:14:53,119][51903] Decorrelating experience for 160 frames... -[2024-07-05 12:14:53,166][51896] Decorrelating experience for 128 frames... -[2024-07-05 12:14:53,236][51874] Decorrelating experience for 128 frames... -[2024-07-05 12:14:53,276][51895] Decorrelating experience for 128 frames... -[2024-07-05 12:14:53,291][51902] Decorrelating experience for 32 frames... -[2024-07-05 12:14:53,293][51901] Decorrelating experience for 96 frames... -[2024-07-05 12:14:53,313][51875] Decorrelating experience for 64 frames... -[2024-07-05 12:14:53,495][51878] Decorrelating experience for 96 frames... -[2024-07-05 12:14:53,516][51896] Decorrelating experience for 160 frames... -[2024-07-05 12:14:53,516][51904] Decorrelating experience for 128 frames... -[2024-07-05 12:14:53,520][51899] Decorrelating experience for 96 frames... -[2024-07-05 12:14:53,545][51875] Decorrelating experience for 96 frames... -[2024-07-05 12:14:53,571][51900] Decorrelating experience for 160 frames... -[2024-07-05 12:14:53,577][51902] Decorrelating experience for 64 frames... -[2024-07-05 12:14:53,757][51898] Decorrelating experience for 96 frames... -[2024-07-05 12:14:53,759][51874] Decorrelating experience for 160 frames... -[2024-07-05 12:14:53,786][51895] Decorrelating experience for 160 frames... -[2024-07-05 12:14:53,787][51877] Decorrelating experience for 64 frames... -[2024-07-05 12:14:53,789][51901] Decorrelating experience for 128 frames... -[2024-07-05 12:14:53,832][51904] Decorrelating experience for 160 frames... -[2024-07-05 12:14:53,864][51902] Decorrelating experience for 96 frames... -[2024-07-05 12:14:53,945][51875] Decorrelating experience for 128 frames... -[2024-07-05 12:14:54,019][51879] Decorrelating experience for 128 frames... -[2024-07-05 12:14:54,035][51900] Decorrelating experience for 192 frames... -[2024-07-05 12:14:54,038][51901] Decorrelating experience for 160 frames... -[2024-07-05 12:14:54,073][51897] Decorrelating experience for 160 frames... -[2024-07-05 12:14:54,134][51895] Decorrelating experience for 192 frames... -[2024-07-05 12:14:54,162][51898] Decorrelating experience for 128 frames... -[2024-07-05 12:14:54,278][51875] Decorrelating experience for 160 frames... -[2024-07-05 12:14:54,282][51876] Decorrelating experience for 160 frames... -[2024-07-05 12:14:54,359][51899] Decorrelating experience for 128 frames... -[2024-07-05 12:14:54,385][51897] Decorrelating experience for 192 frames... -[2024-07-05 12:14:54,387][51904] Decorrelating experience for 192 frames... -[2024-07-05 12:14:54,389][51901] Decorrelating experience for 192 frames... -[2024-07-05 12:14:54,461][51898] Decorrelating experience for 160 frames... -[2024-07-05 12:14:54,462][51874] Decorrelating experience for 192 frames... -[2024-07-05 12:14:54,575][51877] Decorrelating experience for 96 frames... -[2024-07-05 12:14:54,622][51896] Decorrelating experience for 192 frames... -[2024-07-05 12:14:54,630][51895] Decorrelating experience for 224 frames... -[2024-07-05 12:14:54,655][51875] Decorrelating experience for 192 frames... -[2024-07-05 12:14:54,679][51876] Decorrelating experience for 192 frames... -[2024-07-05 12:14:54,720][51900] Decorrelating experience for 224 frames... -[2024-07-05 12:14:54,729][51899] Decorrelating experience for 160 frames... -[2024-07-05 12:14:54,805][51901] Decorrelating experience for 224 frames... -[2024-07-05 12:14:54,867][51903] Decorrelating experience for 192 frames... -[2024-07-05 12:14:54,882][51902] Decorrelating experience for 128 frames... -[2024-07-05 12:14:54,977][51874] Decorrelating experience for 224 frames... -[2024-07-05 12:14:55,041][51898] Decorrelating experience for 192 frames... -[2024-07-05 12:14:55,065][51896] Decorrelating experience for 224 frames... -[2024-07-05 12:14:55,104][51897] Decorrelating experience for 224 frames... -[2024-07-05 12:14:55,152][51904] Decorrelating experience for 224 frames... -[2024-07-05 12:14:55,251][51903] Decorrelating experience for 224 frames... -[2024-07-05 12:14:55,263][51876] Decorrelating experience for 224 frames... -[2024-07-05 12:14:55,344][51877] Decorrelating experience for 128 frames... -[2024-07-05 12:14:55,376][51898] Decorrelating experience for 224 frames... -[2024-07-05 12:14:55,438][51879] Decorrelating experience for 160 frames... -[2024-07-05 12:14:55,569][51875] Decorrelating experience for 224 frames... -[2024-07-05 12:14:55,698][51878] Decorrelating experience for 128 frames... -[2024-07-05 12:14:55,714][51877] Decorrelating experience for 160 frames... -[2024-07-05 12:14:55,729][51902] Decorrelating experience for 160 frames... -[2024-07-05 12:14:55,817][51879] Decorrelating experience for 192 frames... -[2024-07-05 12:14:55,856][51899] Decorrelating experience for 192 frames... -[2024-07-05 12:14:55,867][25826] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 150011904. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 12:14:55,868][25826] Avg episode reward: [(0, '0.448')] -[2024-07-05 12:14:56,093][51877] Decorrelating experience for 192 frames... -[2024-07-05 12:14:56,108][51902] Decorrelating experience for 192 frames... -[2024-07-05 12:14:56,136][51878] Decorrelating experience for 160 frames... -[2024-07-05 12:14:56,266][51879] Decorrelating experience for 224 frames... -[2024-07-05 12:14:56,310][51899] Decorrelating experience for 224 frames... -[2024-07-05 12:14:56,451][51853] Signal inference workers to stop experience collection... -[2024-07-05 12:14:56,455][51873] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 12:14:56,577][51878] Decorrelating experience for 192 frames... -[2024-07-05 12:14:56,577][51877] Decorrelating experience for 224 frames... -[2024-07-05 12:14:56,577][51902] Decorrelating experience for 224 frames... -[2024-07-05 12:14:56,896][51878] Decorrelating experience for 224 frames... -[2024-07-05 12:14:59,886][51853] Signal inference workers to resume experience collection... -[2024-07-05 12:14:59,889][51873] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 12:15:00,878][25826] Fps is (10 sec: 818.5, 60 sec: 818.5, 300 sec: 818.5). Total num frames: 150020096. Throughput: 0: 553.5. Samples: 5540. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 12:15:00,924][25826] Avg episode reward: [(0, '1.310')] -[2024-07-05 12:15:04,748][25826] Heartbeat connected on Batcher_0 -[2024-07-05 12:15:04,801][25826] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 12:15:04,860][25826] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 12:15:04,928][25826] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 12:15:04,977][25826] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 12:15:05,021][25826] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 12:15:05,084][25826] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 12:15:05,116][25826] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 12:15:05,177][25826] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 12:15:05,223][25826] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 12:15:05,277][25826] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 12:15:05,328][25826] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 12:15:05,375][25826] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 12:15:05,433][25826] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 12:15:05,489][25826] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 12:15:05,583][25826] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 12:15:05,669][25826] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 12:15:05,888][25826] Fps is (10 sec: 818.0, 60 sec: 545.6, 300 sec: 545.6). Total num frames: 150020096. Throughput: 0: 369.0. Samples: 5540. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 12:15:06,096][25826] Avg episode reward: [(0, '1.310')] -[2024-07-05 12:15:09,115][25826] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 12:15:10,875][25826] Fps is (10 sec: 0.0, 60 sec: 409.5, 300 sec: 409.5). Total num frames: 150020096. Throughput: 0: 284.3. Samples: 5688. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 12:15:10,931][25826] Avg episode reward: [(0, '1.556')] -[2024-07-05 12:15:15,855][25826] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 12:15:15,868][25826] Fps is (10 sec: 820.4, 60 sec: 655.4, 300 sec: 655.4). Total num frames: 150028288. Throughput: 0: 237.8. Samples: 5944. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 12:15:15,869][25826] Avg episode reward: [(0, '1.556')] -[2024-07-05 12:15:20,870][25826] Fps is (10 sec: 3278.0, 60 sec: 1365.2, 300 sec: 1365.2). Total num frames: 150052864. Throughput: 0: 334.6. Samples: 10040. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) -[2024-07-05 12:15:20,883][25826] Avg episode reward: [(0, '2.260')] -[2024-07-05 12:15:24,615][51873] Updated weights for policy 0, policy_version 20764 (0.0215) -[2024-07-05 12:15:25,870][25826] Fps is (10 sec: 8190.5, 60 sec: 2808.5, 300 sec: 2808.5). Total num frames: 150110208. Throughput: 0: 592.3. Samples: 20732. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) -[2024-07-05 12:15:25,880][25826] Avg episode reward: [(0, '6.944')] -[2024-07-05 12:15:30,872][25826] Fps is (10 sec: 8190.5, 60 sec: 3071.7, 300 sec: 3071.7). Total num frames: 150134784. Throughput: 0: 843.9. Samples: 33760. Policy #0 lag: (min: 0.0, avg: 1.4, max: 2.0) -[2024-07-05 12:15:30,892][25826] Avg episode reward: [(0, '7.868')] -[2024-07-05 12:15:35,870][25826] Fps is (10 sec: 5734.5, 60 sec: 3458.7, 300 sec: 3458.7). Total num frames: 150167552. Throughput: 0: 955.2. Samples: 42984. Policy #0 lag: (min: 0.0, avg: 2.0, max: 3.0) -[2024-07-05 12:15:35,884][25826] Avg episode reward: [(0, '13.215')] -[2024-07-05 12:15:36,903][51873] Updated weights for policy 0, policy_version 20774 (0.0072) -[2024-07-05 12:15:40,876][25826] Fps is (10 sec: 4094.6, 60 sec: 3276.3, 300 sec: 3276.3). Total num frames: 150175744. Throughput: 0: 980.5. Samples: 44128. Policy #0 lag: (min: 0.0, avg: 2.0, max: 3.0) -[2024-07-05 12:15:40,923][25826] Avg episode reward: [(0, '14.277')] -[2024-07-05 12:15:45,873][25826] Fps is (10 sec: 818.9, 60 sec: 2978.7, 300 sec: 2978.7). Total num frames: 150175744. Throughput: 0: 877.5. Samples: 45024. Policy #0 lag: (min: 0.0, avg: 2.0, max: 3.0) -[2024-07-05 12:15:45,898][25826] Avg episode reward: [(0, '14.375')] -[2024-07-05 12:15:50,869][25826] Fps is (10 sec: 819.7, 60 sec: 2867.2, 300 sec: 2867.2). Total num frames: 150183936. Throughput: 0: 906.2. Samples: 46304. Policy #0 lag: (min: 0.0, avg: 2.0, max: 3.0) -[2024-07-05 12:15:50,891][25826] Avg episode reward: [(0, '14.375')] -[2024-07-05 12:15:55,870][25826] Fps is (10 sec: 1638.8, 60 sec: 3003.6, 300 sec: 2772.6). Total num frames: 150192128. Throughput: 0: 922.6. Samples: 47200. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:15:55,885][25826] Avg episode reward: [(0, '14.674')] -[2024-07-05 12:16:00,871][25826] Fps is (10 sec: 6554.0, 60 sec: 3823.5, 300 sec: 3393.8). Total num frames: 150249472. Throughput: 0: 1141.2. Samples: 57296. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:16:00,902][25826] Avg episode reward: [(0, '41.736')] -[2024-07-05 12:16:00,968][51873] Updated weights for policy 0, policy_version 20784 (0.0106) -[2024-07-05 12:16:05,872][25826] Fps is (10 sec: 10647.3, 60 sec: 4642.9, 300 sec: 3822.7). Total num frames: 150298624. Throughput: 0: 1463.8. Samples: 75916. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:16:05,897][25826] Avg episode reward: [(0, '50.913')] -[2024-07-05 12:16:10,025][51873] Updated weights for policy 0, policy_version 20794 (0.0053) -[2024-07-05 12:16:10,873][25826] Fps is (10 sec: 9008.1, 60 sec: 5325.0, 300 sec: 4095.8). Total num frames: 150339584. Throughput: 0: 1336.7. Samples: 80884. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-07-05 12:16:10,891][25826] Avg episode reward: [(0, '51.100')] -[2024-07-05 12:16:15,870][25826] Fps is (10 sec: 4916.5, 60 sec: 5324.6, 300 sec: 3951.4). Total num frames: 150347776. Throughput: 0: 1167.9. Samples: 86312. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-07-05 12:16:15,879][25826] Avg episode reward: [(0, '51.023')] -[2024-07-05 12:16:20,872][25826] Fps is (10 sec: 3276.7, 60 sec: 5324.6, 300 sec: 4004.8). Total num frames: 150372352. Throughput: 0: 993.6. Samples: 87700. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-07-05 12:16:20,889][25826] Avg episode reward: [(0, '50.530')] -[2024-07-05 12:16:25,869][25826] Fps is (10 sec: 3277.3, 60 sec: 4505.7, 300 sec: 3880.4). Total num frames: 150380544. Throughput: 0: 1116.7. Samples: 94372. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) -[2024-07-05 12:16:25,878][25826] Avg episode reward: [(0, '50.413')] -[2024-07-05 12:16:30,869][25826] Fps is (10 sec: 2458.5, 60 sec: 4369.3, 300 sec: 3850.2). Total num frames: 150396928. Throughput: 0: 1200.0. Samples: 99020. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:16:30,882][25826] Avg episode reward: [(0, '48.997')] -[2024-07-05 12:16:33,544][51873] Updated weights for policy 0, policy_version 20804 (0.0082) -[2024-07-05 12:16:35,872][25826] Fps is (10 sec: 4094.5, 60 sec: 4232.4, 300 sec: 3900.8). Total num frames: 150421504. Throughput: 0: 1278.8. Samples: 103856. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:16:35,887][25826] Avg episode reward: [(0, '49.244')] -[2024-07-05 12:16:40,872][25826] Fps is (10 sec: 2456.7, 60 sec: 4096.2, 300 sec: 3723.5). Total num frames: 150421504. Throughput: 0: 1299.3. Samples: 105672. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:16:40,884][25826] Avg episode reward: [(0, '48.933')] -[2024-07-05 12:16:40,906][51853] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000020804_150421504.pth... -[2024-07-05 12:16:41,211][51853] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000020091_144580608.pth -[2024-07-05 12:16:45,871][25826] Fps is (10 sec: 0.0, 60 sec: 4096.1, 300 sec: 3561.6). Total num frames: 150421504. Throughput: 0: 1097.7. Samples: 106696. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:16:45,946][25826] Avg episode reward: [(0, '48.933')] -[2024-07-05 12:16:50,869][25826] Fps is (10 sec: 819.5, 60 sec: 4096.0, 300 sec: 3481.6). Total num frames: 150429696. Throughput: 0: 692.1. Samples: 107056. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:16:50,913][25826] Avg episode reward: [(0, '48.776')] -[2024-07-05 12:16:55,874][25826] Fps is (10 sec: 819.0, 60 sec: 3959.2, 300 sec: 3342.2). Total num frames: 150429696. Throughput: 0: 596.3. Samples: 107720. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:16:55,906][25826] Avg episode reward: [(0, '48.506')] -[2024-07-05 12:17:00,872][25826] Fps is (10 sec: 0.0, 60 sec: 3003.6, 300 sec: 3213.7). Total num frames: 150429696. Throughput: 0: 483.7. Samples: 108080. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:17:00,942][25826] Avg episode reward: [(0, '48.389')] -[2024-07-05 12:17:05,874][25826] Fps is (10 sec: 819.2, 60 sec: 2321.0, 300 sec: 3155.3). Total num frames: 150437888. Throughput: 0: 458.6. Samples: 108336. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:17:05,898][25826] Avg episode reward: [(0, '48.412')] -[2024-07-05 12:17:10,878][25826] Fps is (10 sec: 818.9, 60 sec: 1638.3, 300 sec: 3042.6). Total num frames: 150437888. Throughput: 0: 319.3. Samples: 108744. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:17:10,916][25826] Avg episode reward: [(0, '48.412')] -[2024-07-05 12:17:14,742][51873] Updated weights for policy 0, policy_version 20814 (0.0124) -[2024-07-05 12:17:16,928][51873] Updated weights for policy 0, policy_version 20824 (0.0009) -[2024-07-05 12:17:19,014][51873] Updated weights for policy 0, policy_version 20834 (0.0009) -[2024-07-05 12:17:21,174][51873] Updated weights for policy 0, policy_version 20844 (0.0012) -[2024-07-05 12:17:23,294][51873] Updated weights for policy 0, policy_version 20854 (0.0009) -[2024-07-05 12:17:25,424][51873] Updated weights for policy 0, policy_version 20864 (0.0010) -[2024-07-05 12:17:27,567][51873] Updated weights for policy 0, policy_version 20874 (0.0010) -[2024-07-05 12:17:29,699][51873] Updated weights for policy 0, policy_version 20884 (0.0011) -[2024-07-05 12:17:31,923][51873] Updated weights for policy 0, policy_version 20894 (0.0008) -[2024-07-05 12:17:34,023][51873] Updated weights for policy 0, policy_version 20904 (0.0014) -[2024-07-05 12:17:36,143][51873] Updated weights for policy 0, policy_version 20914 (0.0010) -[2024-07-05 12:17:38,202][51873] Updated weights for policy 0, policy_version 20924 (0.0008) -[2024-07-05 12:17:40,338][51873] Updated weights for policy 0, policy_version 20934 (0.0013) -[2024-07-05 12:17:42,510][51873] Updated weights for policy 0, policy_version 20944 (0.0009) -[2024-07-05 12:17:44,686][51873] Updated weights for policy 0, policy_version 20954 (0.0012) -[2024-07-05 12:17:46,891][51873] Updated weights for policy 0, policy_version 20964 (0.0009) -[2024-07-05 12:17:49,163][51873] Updated weights for policy 0, policy_version 20974 (0.0010) -[2024-07-05 12:17:49,547][51904] EvtLoop [rollout_proc14_evt_loop, process=rollout_proc14] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance14'), args=(1, 0) -Traceback (most recent call last): - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal - slot_callable(*args) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts - complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts - new_obs, rewards, terminated, truncated, infos = e.step(actions) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/gymnasium/core.py", line 461, in step - return self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py", line 129, in step - obs, rew, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py", line 115, in step - obs, rew, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step - observation, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/gymnasium/core.py", line 522, in step - observation, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py", line 86, in step - obs, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/gymnasium/core.py", line 461, in step - return self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step - obs, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step - reward = self.game.make_action(actions_flattened, self.skip_frames) -vizdoom.vizdoom.SignalException: Signal SIGTERM received. ViZDoom instance has been closed. -[2024-07-05 12:17:49,549][51895] EvtLoop [rollout_proc9_evt_loop, process=rollout_proc9] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance9'), args=(1, 0) -Traceback (most recent call last): - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal - slot_callable(*args) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts - complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts - new_obs, rewards, terminated, truncated, infos = e.step(actions) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/gymnasium/core.py", line 461, in step - return self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py", line 129, in step - obs, rew, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py", line 115, in step - obs, rew, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step - observation, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/gymnasium/core.py", line 522, in step - observation, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py", line 86, in step - obs, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/gymnasium/core.py", line 461, in step - return self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step - obs, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step - reward = self.game.make_action(actions_flattened, self.skip_frames) -vizdoom.vizdoom.SignalException: Signal SIGTERM received. ViZDoom instance has been closed. -[2024-07-05 12:17:49,551][51895] Unhandled exception Signal SIGTERM received. ViZDoom instance has been closed. in evt loop rollout_proc9_evt_loop -[2024-07-05 12:17:49,551][51904] Unhandled exception Signal SIGTERM received. ViZDoom instance has been closed. in evt loop rollout_proc14_evt_loop -[2024-07-05 12:17:49,553][51899] EvtLoop [rollout_proc10_evt_loop, process=rollout_proc10] unhandled exception in slot='advance_rollouts' connected to emitter=Emitter(object_id='InferenceWorker_p0-w0', signal_name='advance10'), args=(1, 0) +[2024-07-05 10:23:50,059][11866] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:23:50,059][11866] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-07-05 10:23:50,112][11866] Num visible devices: 1 +[2024-07-05 10:23:50,193][11846] Using optimizer +[2024-07-05 10:23:50,713][11846] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001222_5005312.pth... +[2024-07-05 10:23:50,798][11846] Loading model from checkpoint +[2024-07-05 10:23:50,800][11846] Loaded experiment state at self.train_step=1222, self.env_steps=5005312 +[2024-07-05 10:23:50,800][11846] Initialized policy 0 weights for model version 1222 +[2024-07-05 10:23:50,802][11846] LearnerWorker_p0 finished initialization! +[2024-07-05 10:23:50,802][11846] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:23:50,895][11866] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 10:23:50,895][11866] RunningMeanStd input shape: (1,) +[2024-07-05 10:23:50,902][11866] Num input channels: 3 +[2024-07-05 10:23:50,913][11866] Convolutional layer output size: 4608 +[2024-07-05 10:23:50,924][11866] Policy head output size: 512 +[2024-07-05 10:23:51,053][11302] Inference worker 0-0 is ready! +[2024-07-05 10:23:51,054][11302] All inference workers are ready! Signal rollout workers to start! +[2024-07-05 10:23:51,133][11868] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,145][11874] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,153][11872] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,160][11895] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,163][11871] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,167][11878] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,167][11867] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,167][11873] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,168][11870] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,169][11897] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,170][11894] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,172][11875] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,175][11876] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,180][11896] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,181][11869] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,260][11877] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:23:51,899][11871] Decorrelating experience for 0 frames... +[2024-07-05 10:23:51,899][11873] Decorrelating experience for 0 frames... +[2024-07-05 10:23:51,899][11868] Decorrelating experience for 0 frames... +[2024-07-05 10:23:51,899][11867] Decorrelating experience for 0 frames... +[2024-07-05 10:23:51,899][11895] Decorrelating experience for 0 frames... +[2024-07-05 10:23:51,899][11894] Decorrelating experience for 0 frames... +[2024-07-05 10:23:51,899][11870] Decorrelating experience for 0 frames... +[2024-07-05 10:23:51,899][11869] Decorrelating experience for 0 frames... +[2024-07-05 10:23:52,086][11894] Decorrelating experience for 32 frames... +[2024-07-05 10:23:52,113][11876] Decorrelating experience for 0 frames... +[2024-07-05 10:23:52,147][11873] Decorrelating experience for 32 frames... +[2024-07-05 10:23:52,156][11867] Decorrelating experience for 32 frames... +[2024-07-05 10:23:52,156][11868] Decorrelating experience for 32 frames... +[2024-07-05 10:23:52,157][11869] Decorrelating experience for 32 frames... +[2024-07-05 10:23:52,157][11870] Decorrelating experience for 32 frames... +[2024-07-05 10:23:52,180][11874] Decorrelating experience for 0 frames... +[2024-07-05 10:23:52,279][11894] Decorrelating experience for 64 frames... +[2024-07-05 10:23:52,329][11871] Decorrelating experience for 32 frames... +[2024-07-05 10:23:52,340][11897] Decorrelating experience for 0 frames... +[2024-07-05 10:23:52,341][11878] Decorrelating experience for 0 frames... +[2024-07-05 10:23:52,344][11873] Decorrelating experience for 64 frames... +[2024-07-05 10:23:52,349][11869] Decorrelating experience for 64 frames... +[2024-07-05 10:23:52,354][11867] Decorrelating experience for 64 frames... +[2024-07-05 10:23:52,495][11876] Decorrelating experience for 32 frames... +[2024-07-05 10:23:52,512][11871] Decorrelating experience for 64 frames... +[2024-07-05 10:23:52,512][11897] Decorrelating experience for 32 frames... +[2024-07-05 10:23:52,523][11894] Decorrelating experience for 96 frames... +[2024-07-05 10:23:52,609][11867] Decorrelating experience for 96 frames... +[2024-07-05 10:23:52,609][11868] Decorrelating experience for 64 frames... +[2024-07-05 10:23:52,691][11873] Decorrelating experience for 96 frames... +[2024-07-05 10:23:52,701][11896] Decorrelating experience for 0 frames... +[2024-07-05 10:23:52,796][11894] Decorrelating experience for 128 frames... +[2024-07-05 10:23:52,808][11870] Decorrelating experience for 64 frames... +[2024-07-05 10:23:52,808][11871] Decorrelating experience for 96 frames... +[2024-07-05 10:23:52,880][11896] Decorrelating experience for 32 frames... +[2024-07-05 10:23:52,882][11867] Decorrelating experience for 128 frames... +[2024-07-05 10:23:52,886][11875] Decorrelating experience for 0 frames... +[2024-07-05 10:23:52,989][11876] Decorrelating experience for 64 frames... +[2024-07-05 10:23:53,015][11870] Decorrelating experience for 96 frames... +[2024-07-05 10:23:53,070][11873] Decorrelating experience for 128 frames... +[2024-07-05 10:23:53,080][11896] Decorrelating experience for 64 frames... +[2024-07-05 10:23:53,081][11872] Decorrelating experience for 0 frames... +[2024-07-05 10:23:53,124][11867] Decorrelating experience for 160 frames... +[2024-07-05 10:23:53,151][11874] Decorrelating experience for 32 frames... +[2024-07-05 10:23:53,185][11875] Decorrelating experience for 32 frames... +[2024-07-05 10:23:53,250][11894] Decorrelating experience for 160 frames... +[2024-07-05 10:23:53,317][11896] Decorrelating experience for 96 frames... +[2024-07-05 10:23:53,350][11868] Decorrelating experience for 96 frames... +[2024-07-05 10:23:53,377][11878] Decorrelating experience for 32 frames... +[2024-07-05 10:23:53,396][11876] Decorrelating experience for 96 frames... +[2024-07-05 10:23:53,409][11875] Decorrelating experience for 64 frames... +[2024-07-05 10:23:53,427][11871] Decorrelating experience for 128 frames... +[2024-07-05 10:23:53,433][11874] Decorrelating experience for 64 frames... +[2024-07-05 10:23:53,550][11867] Decorrelating experience for 192 frames... +[2024-07-05 10:23:53,585][11894] Decorrelating experience for 192 frames... +[2024-07-05 10:23:53,657][11869] Decorrelating experience for 96 frames... +[2024-07-05 10:23:53,663][11896] Decorrelating experience for 128 frames... +[2024-07-05 10:23:53,663][11875] Decorrelating experience for 96 frames... +[2024-07-05 10:23:53,692][11872] Decorrelating experience for 32 frames... +[2024-07-05 10:23:53,694][11874] Decorrelating experience for 96 frames... +[2024-07-05 10:23:53,713][11895] Decorrelating experience for 32 frames... +[2024-07-05 10:23:53,786][11302] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 5005312. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:23:53,855][11878] Decorrelating experience for 64 frames... +[2024-07-05 10:23:53,893][11876] Decorrelating experience for 128 frames... +[2024-07-05 10:23:53,929][11894] Decorrelating experience for 224 frames... +[2024-07-05 10:23:53,955][11867] Decorrelating experience for 224 frames... +[2024-07-05 10:23:53,975][11869] Decorrelating experience for 128 frames... +[2024-07-05 10:23:53,987][11872] Decorrelating experience for 64 frames... +[2024-07-05 10:23:53,993][11871] Decorrelating experience for 160 frames... +[2024-07-05 10:23:54,000][11868] Decorrelating experience for 128 frames... +[2024-07-05 10:23:54,138][11895] Decorrelating experience for 64 frames... +[2024-07-05 10:23:54,151][11896] Decorrelating experience for 160 frames... +[2024-07-05 10:23:54,156][11876] Decorrelating experience for 160 frames... +[2024-07-05 10:23:54,192][11874] Decorrelating experience for 128 frames... +[2024-07-05 10:23:54,251][11869] Decorrelating experience for 160 frames... +[2024-07-05 10:23:54,260][11875] Decorrelating experience for 128 frames... +[2024-07-05 10:23:54,301][11868] Decorrelating experience for 160 frames... +[2024-07-05 10:23:54,376][11871] Decorrelating experience for 192 frames... +[2024-07-05 10:23:54,378][11878] Decorrelating experience for 96 frames... +[2024-07-05 10:23:54,381][11895] Decorrelating experience for 96 frames... +[2024-07-05 10:23:54,476][11897] Decorrelating experience for 64 frames... +[2024-07-05 10:23:54,577][11896] Decorrelating experience for 192 frames... +[2024-07-05 10:23:54,633][11874] Decorrelating experience for 160 frames... +[2024-07-05 10:23:54,635][11870] Decorrelating experience for 128 frames... +[2024-07-05 10:23:54,649][11868] Decorrelating experience for 192 frames... +[2024-07-05 10:23:54,702][11872] Decorrelating experience for 96 frames... +[2024-07-05 10:23:54,739][11895] Decorrelating experience for 128 frames... +[2024-07-05 10:23:54,781][11897] Decorrelating experience for 96 frames... +[2024-07-05 10:23:54,789][11878] Decorrelating experience for 128 frames... +[2024-07-05 10:23:54,879][11876] Decorrelating experience for 192 frames... +[2024-07-05 10:23:54,936][11877] Decorrelating experience for 0 frames... +[2024-07-05 10:23:54,974][11870] Decorrelating experience for 160 frames... +[2024-07-05 10:23:54,980][11868] Decorrelating experience for 224 frames... +[2024-07-05 10:23:55,026][11874] Decorrelating experience for 192 frames... +[2024-07-05 10:23:55,042][11895] Decorrelating experience for 160 frames... +[2024-07-05 10:23:55,061][11872] Decorrelating experience for 128 frames... +[2024-07-05 10:23:55,062][11896] Decorrelating experience for 224 frames... +[2024-07-05 10:23:55,174][11878] Decorrelating experience for 160 frames... +[2024-07-05 10:23:55,186][11869] Decorrelating experience for 192 frames... +[2024-07-05 10:23:55,218][11877] Decorrelating experience for 32 frames... +[2024-07-05 10:23:55,244][11873] Decorrelating experience for 160 frames... +[2024-07-05 10:23:55,287][11876] Decorrelating experience for 224 frames... +[2024-07-05 10:23:55,371][11870] Decorrelating experience for 192 frames... +[2024-07-05 10:23:55,436][11874] Decorrelating experience for 224 frames... +[2024-07-05 10:23:55,482][11897] Decorrelating experience for 128 frames... +[2024-07-05 10:23:55,483][11877] Decorrelating experience for 64 frames... +[2024-07-05 10:23:55,532][11871] Decorrelating experience for 224 frames... +[2024-07-05 10:23:55,605][11869] Decorrelating experience for 224 frames... +[2024-07-05 10:23:55,637][11873] Decorrelating experience for 192 frames... +[2024-07-05 10:23:55,700][11895] Decorrelating experience for 192 frames... +[2024-07-05 10:23:55,805][11870] Decorrelating experience for 224 frames... +[2024-07-05 10:23:55,820][11875] Decorrelating experience for 160 frames... +[2024-07-05 10:23:55,824][11897] Decorrelating experience for 160 frames... +[2024-07-05 10:23:55,836][11877] Decorrelating experience for 96 frames... +[2024-07-05 10:23:55,884][11878] Decorrelating experience for 192 frames... +[2024-07-05 10:23:55,993][11873] Decorrelating experience for 224 frames... +[2024-07-05 10:23:56,041][11895] Decorrelating experience for 224 frames... +[2024-07-05 10:23:56,184][11897] Decorrelating experience for 192 frames... +[2024-07-05 10:23:56,191][11872] Decorrelating experience for 160 frames... +[2024-07-05 10:23:56,251][11875] Decorrelating experience for 192 frames... +[2024-07-05 10:23:56,289][11878] Decorrelating experience for 224 frames... +[2024-07-05 10:23:56,349][11877] Decorrelating experience for 128 frames... +[2024-07-05 10:23:56,565][11897] Decorrelating experience for 224 frames... +[2024-07-05 10:23:56,607][11872] Decorrelating experience for 192 frames... +[2024-07-05 10:23:56,893][11877] Decorrelating experience for 160 frames... +[2024-07-05 10:23:56,904][11875] Decorrelating experience for 224 frames... +[2024-07-05 10:23:56,960][11846] Signal inference workers to stop experience collection... +[2024-07-05 10:23:56,973][11866] InferenceWorker_p0-w0: stopping experience collection +[2024-07-05 10:23:57,111][11872] Decorrelating experience for 224 frames... +[2024-07-05 10:23:57,199][11877] Decorrelating experience for 192 frames... +[2024-07-05 10:23:57,405][11877] Decorrelating experience for 224 frames... +[2024-07-05 10:23:58,786][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 1212.8. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:23:58,787][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:23:58,841][11846] EvtLoop [learner_proc0_evt_loop, process=learner_proc0] unhandled exception in slot='on_new_training_batch' connected to emitter=Emitter(object_id='Batcher_0', signal_name='training_batches_available'), args=(0,) Traceback (most recent call last): File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal slot_callable(*args) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/sampling/rollout_worker.py", line 241, in advance_rollouts - complete_rollouts, episodic_stats = runner.advance_rollouts(policy_id, self.timing) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 634, in advance_rollouts - new_obs, rewards, terminated, truncated, infos = e.step(actions) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/gymnasium/core.py", line 461, in step - return self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py", line 129, in step - obs, rew, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/utils/make_env.py", line 115, in step - obs, rew, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 33, in step - observation, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/gymnasium/core.py", line 522, in step - observation, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/envs/env_wrappers.py", line 86, in step - obs, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/gymnasium/core.py", line 461, in step - return self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 54, in step - obs, reward, terminated, truncated, info = self.env.step(action) - File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 452, in step - reward = self.game.make_action(actions_flattened, self.skip_frames) -vizdoom.vizdoom.SignalException: Signal SIGTERM received. ViZDoom instance has been closed. -[2024-07-05 12:17:49,554][51899] Unhandled exception Signal SIGTERM received. ViZDoom instance has been closed. in evt loop rollout_proc10_evt_loop -[2024-07-05 12:21:40,384][05794] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 12:21:40,386][05794] Rollout worker 0 uses device cpu -[2024-07-05 12:21:40,386][05794] Rollout worker 1 uses device cpu -[2024-07-05 12:21:40,386][05794] Rollout worker 2 uses device cpu -[2024-07-05 12:21:40,387][05794] Rollout worker 3 uses device cpu -[2024-07-05 12:21:40,387][05794] Rollout worker 4 uses device cpu -[2024-07-05 12:21:40,387][05794] Rollout worker 5 uses device cpu -[2024-07-05 12:21:40,388][05794] Rollout worker 6 uses device cpu -[2024-07-05 12:21:40,388][05794] Rollout worker 7 uses device cpu -[2024-07-05 12:21:40,389][05794] Rollout worker 8 uses device cpu -[2024-07-05 12:21:40,389][05794] Rollout worker 9 uses device cpu -[2024-07-05 12:21:40,389][05794] Rollout worker 10 uses device cpu -[2024-07-05 12:21:40,390][05794] Rollout worker 11 uses device cpu -[2024-07-05 12:21:40,390][05794] Rollout worker 12 uses device cpu -[2024-07-05 12:21:40,390][05794] Rollout worker 13 uses device cpu -[2024-07-05 12:21:40,391][05794] Rollout worker 14 uses device cpu -[2024-07-05 12:21:40,391][05794] Rollout worker 15 uses device cpu -[2024-07-05 12:21:40,501][05794] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:21:40,502][05794] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 12:21:40,575][05794] Starting all processes... -[2024-07-05 12:21:40,577][05794] Starting process learner_proc0 -[2024-07-05 12:21:41,238][05794] Starting all processes... -[2024-07-05 12:21:41,245][05794] Starting process inference_proc0-0 -[2024-07-05 12:21:41,246][05794] Starting process rollout_proc0 -[2024-07-05 12:21:41,246][05794] Starting process rollout_proc1 -[2024-07-05 12:21:41,247][05794] Starting process rollout_proc2 -[2024-07-05 12:21:41,247][05794] Starting process rollout_proc3 -[2024-07-05 12:21:41,247][05794] Starting process rollout_proc4 -[2024-07-05 12:21:41,247][05794] Starting process rollout_proc5 -[2024-07-05 12:21:41,248][05794] Starting process rollout_proc6 -[2024-07-05 12:21:41,248][05794] Starting process rollout_proc7 -[2024-07-05 12:21:41,248][05794] Starting process rollout_proc8 -[2024-07-05 12:21:41,248][05794] Starting process rollout_proc9 -[2024-07-05 12:21:41,248][05794] Starting process rollout_proc10 -[2024-07-05 12:21:41,253][05794] Starting process rollout_proc11 -[2024-07-05 12:21:41,255][05794] Starting process rollout_proc12 -[2024-07-05 12:21:41,256][05794] Starting process rollout_proc13 -[2024-07-05 12:21:41,256][05794] Starting process rollout_proc14 -[2024-07-05 12:21:41,299][05794] Starting process rollout_proc15 -[2024-07-05 12:21:45,349][06031] Worker 9 uses CPU cores [9] -[2024-07-05 12:21:45,352][06024] Worker 2 uses CPU cores [2] -[2024-07-05 12:21:45,473][06030] Worker 8 uses CPU cores [8] -[2024-07-05 12:21:45,493][06027] Worker 5 uses CPU cores [5] -[2024-07-05 12:21:45,667][06001] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:21:45,668][06001] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 12:21:45,693][06034] Worker 12 uses CPU cores [12] -[2024-07-05 12:21:45,729][06022] Worker 1 uses CPU cores [1] -[2024-07-05 12:21:45,732][06001] Num visible devices: 1 -[2024-07-05 12:21:45,770][06001] Setting fixed seed 200 -[2024-07-05 12:21:45,781][06001] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:21:45,781][06001] Initializing actor-critic model on device cuda:0 -[2024-07-05 12:21:45,782][06001] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 12:21:45,783][06001] RunningMeanStd input shape: (1,) -[2024-07-05 12:21:45,793][06026] Worker 4 uses CPU cores [4] -[2024-07-05 12:21:45,794][06001] ConvEncoder: input_channels=3 -[2024-07-05 12:21:45,799][06035] Worker 15 uses CPU cores [15] -[2024-07-05 12:21:45,826][06028] Worker 7 uses CPU cores [7] -[2024-07-05 12:21:45,833][06033] Worker 11 uses CPU cores [11] -[2024-07-05 12:21:45,837][06023] Worker 0 uses CPU cores [0] -[2024-07-05 12:21:45,899][06021] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:21:45,899][06021] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 12:21:45,908][06001] Conv encoder output size: 512 -[2024-07-05 12:21:45,908][06001] Policy head output size: 512 -[2024-07-05 12:21:45,928][06001] Created Actor Critic model with architecture: -[2024-07-05 12:21:45,929][06001] ActorCriticSharedWeights( + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/learning/learner_worker.py", line 150, in on_new_training_batch + stats = self.learner.train(self.batcher.training_batches[batch_idx]) + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/learning/learner.py", line 1046, in train + train_stats = self._train(buff, self.cfg.batch_size, experience_size, num_invalids) + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/learning/learner.py", line 731, in _train + ) = self._calculate_losses(mb, num_invalids) + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/algo/learning/learner.py", line 572, in _calculate_losses + core_output_seq, _ = self.actor_critic.forward_core(head_output_seq, rnn_states) + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/model/actor_critic.py", line 159, in forward_core + x, new_rnn_states = self.core(head_output, rnn_states) + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl + return forward_call(*args, **kwargs) + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/sample_factory/model/core.py", line 49, in forward + x, new_rnn_states = self.core(head_output, rnn_states.contiguous()) + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl + return forward_call(*args, **kwargs) + File "/home/raghu/anaconda3/envs/rl/lib/python3.10/site-packages/torch/nn/modules/rnn.py", line 1136, in forward + result = _VF.gru(input, batch_sizes, hx, self._flat_weights, self.bias, +torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 28.00 MiB. GPU +[2024-07-05 10:23:58,842][11846] Unhandled exception CUDA out of memory. Tried to allocate 28.00 MiB. GPU in evt loop learner_proc0_evt_loop +[2024-07-05 10:24:03,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 606.4. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:03,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:04,365][11302] Heartbeat connected on Batcher_0 +[2024-07-05 10:24:04,372][11302] Heartbeat connected on InferenceWorker_p0-w0 +[2024-07-05 10:24:04,375][11302] Heartbeat connected on RolloutWorker_w0 +[2024-07-05 10:24:04,378][11302] Heartbeat connected on RolloutWorker_w1 +[2024-07-05 10:24:04,381][11302] Heartbeat connected on RolloutWorker_w2 +[2024-07-05 10:24:04,384][11302] Heartbeat connected on RolloutWorker_w3 +[2024-07-05 10:24:04,387][11302] Heartbeat connected on RolloutWorker_w4 +[2024-07-05 10:24:04,390][11302] Heartbeat connected on RolloutWorker_w5 +[2024-07-05 10:24:04,393][11302] Heartbeat connected on RolloutWorker_w6 +[2024-07-05 10:24:04,399][11302] Heartbeat connected on RolloutWorker_w8 +[2024-07-05 10:24:04,402][11302] Heartbeat connected on RolloutWorker_w7 +[2024-07-05 10:24:04,447][11302] Heartbeat connected on RolloutWorker_w10 +[2024-07-05 10:24:04,448][11302] Heartbeat connected on RolloutWorker_w9 +[2024-07-05 10:24:04,449][11302] Heartbeat connected on RolloutWorker_w11 +[2024-07-05 10:24:04,452][11302] Heartbeat connected on RolloutWorker_w12 +[2024-07-05 10:24:04,455][11302] Heartbeat connected on RolloutWorker_w13 +[2024-07-05 10:24:04,458][11302] Heartbeat connected on RolloutWorker_w14 +[2024-07-05 10:24:04,460][11302] Heartbeat connected on RolloutWorker_w15 +[2024-07-05 10:24:08,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 404.2. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:08,789][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:13,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 303.2. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:13,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:18,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 242.6. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:18,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:23,786][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 202.1. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:23,787][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:28,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 173.3. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:28,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:33,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 151.6. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:33,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:38,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 134.8. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:38,789][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:43,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:43,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:48,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:48,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:53,786][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:53,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:24:58,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:24:58,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:03,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:03,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:08,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:08,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:13,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:13,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:18,786][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:18,787][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:23,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:23,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:28,786][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:28,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:33,786][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:33,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:38,786][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:38,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:43,786][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:43,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:48,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:48,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:53,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:53,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:25:58,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:25:58,789][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:26:03,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:26:03,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:26:08,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:26:08,789][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:26:13,787][11302] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 0.0. Samples: 6064. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:26:13,788][11302] Avg episode reward: [(0, '2.206')] +[2024-07-05 10:26:16,861][11302] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 11302], exiting... +[2024-07-05 10:26:16,863][11846] Stopping Batcher_0... +[2024-07-05 10:26:16,864][11846] Loop batcher_evt_loop terminating... +[2024-07-05 10:26:16,863][11302] Runner profile tree view: +main_loop: 152.4026 +[2024-07-05 10:26:16,872][11302] Collected {0: 5005312}, FPS: 0.0 +[2024-07-05 10:26:16,898][11876] Stopping RolloutWorker_w10... +[2024-07-05 10:26:16,899][11872] Stopping RolloutWorker_w5... +[2024-07-05 10:26:16,899][11876] Loop rollout_proc10_evt_loop terminating... +[2024-07-05 10:26:16,899][11872] Loop rollout_proc5_evt_loop terminating... +[2024-07-05 10:26:16,899][11895] Stopping RolloutWorker_w13... +[2024-07-05 10:26:16,900][11895] Loop rollout_proc13_evt_loop terminating... +[2024-07-05 10:26:16,900][11870] Stopping RolloutWorker_w3... +[2024-07-05 10:26:16,901][11877] Stopping RolloutWorker_w9... +[2024-07-05 10:26:16,901][11870] Loop rollout_proc3_evt_loop terminating... +[2024-07-05 10:26:16,902][11877] Loop rollout_proc9_evt_loop terminating... +[2024-07-05 10:26:16,902][11868] Stopping RolloutWorker_w1... +[2024-07-05 10:26:16,902][11871] Stopping RolloutWorker_w4... +[2024-07-05 10:26:16,903][11871] Loop rollout_proc4_evt_loop terminating... +[2024-07-05 10:26:16,903][11868] Loop rollout_proc1_evt_loop terminating... +[2024-07-05 10:26:16,903][11874] Stopping RolloutWorker_w7... +[2024-07-05 10:26:16,904][11874] Loop rollout_proc7_evt_loop terminating... +[2024-07-05 10:26:16,904][11894] Stopping RolloutWorker_w12... +[2024-07-05 10:26:16,905][11875] Stopping RolloutWorker_w8... +[2024-07-05 10:26:16,905][11894] Loop rollout_proc12_evt_loop terminating... +[2024-07-05 10:26:16,905][11875] Loop rollout_proc8_evt_loop terminating... +[2024-07-05 10:26:16,906][11896] Stopping RolloutWorker_w14... +[2024-07-05 10:26:16,907][11896] Loop rollout_proc14_evt_loop terminating... +[2024-07-05 10:26:16,915][11869] Stopping RolloutWorker_w2... +[2024-07-05 10:26:16,916][11869] Loop rollout_proc2_evt_loop terminating... +[2024-07-05 10:26:16,917][11867] Stopping RolloutWorker_w0... +[2024-07-05 10:26:16,918][11867] Loop rollout_proc0_evt_loop terminating... +[2024-07-05 10:26:16,918][11873] Stopping RolloutWorker_w6... +[2024-07-05 10:26:16,919][11873] Loop rollout_proc6_evt_loop terminating... +[2024-07-05 10:26:16,921][11897] Stopping RolloutWorker_w15... +[2024-07-05 10:26:16,921][11897] Loop rollout_proc15_evt_loop terminating... +[2024-07-05 10:26:16,983][11878] Stopping RolloutWorker_w11... +[2024-07-05 10:26:16,997][11878] Loop rollout_proc11_evt_loop terminating... +[2024-07-05 10:26:17,001][11866] Weights refcount: 2 0 +[2024-07-05 10:26:17,007][11866] Stopping InferenceWorker_p0-w0... +[2024-07-05 10:26:17,009][11866] Loop inference_proc0-0_evt_loop terminating... +[2024-07-05 10:29:01,075][17621] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json... +[2024-07-05 10:29:01,077][17621] Rollout worker 0 uses device cpu +[2024-07-05 10:29:01,078][17621] Rollout worker 1 uses device cpu +[2024-07-05 10:29:01,079][17621] Rollout worker 2 uses device cpu +[2024-07-05 10:29:01,080][17621] Rollout worker 3 uses device cpu +[2024-07-05 10:29:01,080][17621] Rollout worker 4 uses device cpu +[2024-07-05 10:29:01,081][17621] Rollout worker 5 uses device cpu +[2024-07-05 10:29:01,081][17621] Rollout worker 6 uses device cpu +[2024-07-05 10:29:01,082][17621] Rollout worker 7 uses device cpu +[2024-07-05 10:29:01,127][17621] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:29:01,128][17621] InferenceWorker_p0-w0: min num requests: 2 +[2024-07-05 10:29:01,155][17621] Starting all processes... +[2024-07-05 10:29:01,155][17621] Starting process learner_proc0 +[2024-07-05 10:29:01,854][17621] Starting all processes... +[2024-07-05 10:29:01,860][17621] Starting process inference_proc0-0 +[2024-07-05 10:29:01,861][17621] Starting process rollout_proc0 +[2024-07-05 10:29:01,861][17621] Starting process rollout_proc1 +[2024-07-05 10:29:01,861][17621] Starting process rollout_proc2 +[2024-07-05 10:29:01,862][17621] Starting process rollout_proc3 +[2024-07-05 10:29:01,862][17621] Starting process rollout_proc4 +[2024-07-05 10:29:01,862][17621] Starting process rollout_proc5 +[2024-07-05 10:29:01,863][17621] Starting process rollout_proc6 +[2024-07-05 10:29:01,864][17621] Starting process rollout_proc7 +[2024-07-05 10:29:04,561][17915] Worker 3 uses CPU cores [6, 7] +[2024-07-05 10:29:04,576][17913] Worker 1 uses CPU cores [2, 3] +[2024-07-05 10:29:04,576][17914] Worker 2 uses CPU cores [4, 5] +[2024-07-05 10:29:04,729][17898] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:29:04,729][17898] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-07-05 10:29:04,777][17898] Num visible devices: 1 +[2024-07-05 10:29:04,793][17919] Worker 7 uses CPU cores [14, 15] +[2024-07-05 10:29:04,801][17898] Setting fixed seed 200 +[2024-07-05 10:29:04,812][17898] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:29:04,812][17898] Initializing actor-critic model on device cuda:0 +[2024-07-05 10:29:04,813][17898] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 10:29:04,814][17898] RunningMeanStd input shape: (1,) +[2024-07-05 10:29:04,823][17898] Num input channels: 3 +[2024-07-05 10:29:04,854][17898] Convolutional layer output size: 4608 +[2024-07-05 10:29:04,867][17898] Policy head output size: 512 +[2024-07-05 10:29:04,931][17917] Worker 5 uses CPU cores [10, 11] +[2024-07-05 10:29:05,001][17898] Created Actor Critic model with architecture: +[2024-07-05 10:29:05,002][17898] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -8364,3728 +2276,67 @@ vizdoom.vizdoom.SignalException: Signal SIGTERM received. ViZDoom instance has b ) (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) + (basic_encoder): ResnetEncoder( + (conv_head): Sequential( + (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (2): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) + (3): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - ) - ) - ) - (core): ModelCoreRNN( - (core): GRU(512, 512) - ) - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=5, bias=True) - ) -) -[2024-07-05 12:21:45,937][06032] Worker 10 uses CPU cores [10] -[2024-07-05 12:21:45,959][06021] Num visible devices: 1 -[2024-07-05 12:21:45,980][06025] Worker 3 uses CPU cores [3] -[2024-07-05 12:21:45,985][06051] Worker 13 uses CPU cores [13] -[2024-07-05 12:21:45,996][06052] Worker 14 uses CPU cores [14] -[2024-07-05 12:21:46,029][06029] Worker 6 uses CPU cores [6] -[2024-07-05 12:21:46,069][06001] Using optimizer -[2024-07-05 12:21:46,554][06001] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000020804_150421504.pth... -[2024-07-05 12:21:46,603][06001] Loading model from checkpoint -[2024-07-05 12:21:46,604][06001] Loaded experiment state at self.train_step=20804, self.env_steps=150421504 -[2024-07-05 12:21:46,605][06001] Initialized policy 0 weights for model version 20804 -[2024-07-05 12:21:46,606][06001] LearnerWorker_p0 finished initialization! -[2024-07-05 12:21:46,606][06001] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:21:46,683][06021] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 12:21:46,683][06021] RunningMeanStd input shape: (1,) -[2024-07-05 12:21:46,690][06021] ConvEncoder: input_channels=3 -[2024-07-05 12:21:46,743][06021] Conv encoder output size: 512 -[2024-07-05 12:21:46,744][06021] Policy head output size: 512 -[2024-07-05 12:21:46,777][05794] Inference worker 0-0 is ready! -[2024-07-05 12:21:46,777][05794] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 12:21:46,847][06027] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,851][06029] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,852][06025] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,852][06035] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,852][06052] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,854][06051] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,856][06022] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,856][06034] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,857][06024] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,860][06023] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,862][06033] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,865][06032] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,865][06028] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,866][06030] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,867][06031] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:46,870][06026] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:21:47,237][06034] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,562][06022] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,562][06031] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,563][06025] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,564][06029] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,564][06051] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,565][06027] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,567][06052] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,567][06035] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,567][06026] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,754][06031] Decorrelating experience for 32 frames... -[2024-07-05 12:21:47,754][06027] Decorrelating experience for 32 frames... -[2024-07-05 12:21:47,773][06034] Decorrelating experience for 32 frames... -[2024-07-05 12:21:47,798][06023] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,812][06029] Decorrelating experience for 32 frames... -[2024-07-05 12:21:47,812][06025] Decorrelating experience for 32 frames... -[2024-07-05 12:21:47,850][06024] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,855][06028] Decorrelating experience for 0 frames... -[2024-07-05 12:21:47,944][06022] Decorrelating experience for 32 frames... -[2024-07-05 12:21:47,949][06035] Decorrelating experience for 32 frames... -[2024-07-05 12:21:47,993][06027] Decorrelating experience for 64 frames... -[2024-07-05 12:21:47,994][06033] Decorrelating experience for 0 frames... -[2024-07-05 12:21:48,009][06029] Decorrelating experience for 64 frames... -[2024-07-05 12:21:48,053][06031] Decorrelating experience for 64 frames... -[2024-07-05 12:21:48,070][06025] Decorrelating experience for 64 frames... -[2024-07-05 12:21:48,095][06024] Decorrelating experience for 32 frames... -[2024-07-05 12:21:48,140][06035] Decorrelating experience for 64 frames... -[2024-07-05 12:21:48,189][06023] Decorrelating experience for 32 frames... -[2024-07-05 12:21:48,193][06027] Decorrelating experience for 96 frames... -[2024-07-05 12:21:48,196][06022] Decorrelating experience for 64 frames... -[2024-07-05 12:21:48,196][06029] Decorrelating experience for 96 frames... -[2024-07-05 12:21:48,202][06030] Decorrelating experience for 0 frames... -[2024-07-05 12:21:48,274][06033] Decorrelating experience for 32 frames... -[2024-07-05 12:21:48,338][06035] Decorrelating experience for 96 frames... -[2024-07-05 12:21:48,381][06030] Decorrelating experience for 32 frames... -[2024-07-05 12:21:48,385][06023] Decorrelating experience for 64 frames... -[2024-07-05 12:21:48,400][06025] Decorrelating experience for 96 frames... -[2024-07-05 12:21:48,423][06034] Decorrelating experience for 64 frames... -[2024-07-05 12:21:48,548][06024] Decorrelating experience for 64 frames... -[2024-07-05 12:21:48,563][06033] Decorrelating experience for 64 frames... -[2024-07-05 12:21:48,603][06035] Decorrelating experience for 128 frames... -[2024-07-05 12:21:48,628][06031] Decorrelating experience for 96 frames... -[2024-07-05 12:21:48,640][06030] Decorrelating experience for 64 frames... -[2024-07-05 12:21:48,747][06028] Decorrelating experience for 32 frames... -[2024-07-05 12:21:48,785][06025] Decorrelating experience for 128 frames... -[2024-07-05 12:21:48,816][06022] Decorrelating experience for 96 frames... -[2024-07-05 12:21:48,830][06024] Decorrelating experience for 96 frames... -[2024-07-05 12:21:48,842][06035] Decorrelating experience for 160 frames... -[2024-07-05 12:21:48,935][06030] Decorrelating experience for 96 frames... -[2024-07-05 12:21:48,943][06034] Decorrelating experience for 96 frames... -[2024-07-05 12:21:48,958][06028] Decorrelating experience for 64 frames... -[2024-07-05 12:21:49,039][06033] Decorrelating experience for 96 frames... -[2024-07-05 12:21:49,043][06031] Decorrelating experience for 128 frames... -[2024-07-05 12:21:49,055][06027] Decorrelating experience for 128 frames... -[2024-07-05 12:21:49,122][06025] Decorrelating experience for 160 frames... -[2024-07-05 12:21:49,211][06029] Decorrelating experience for 128 frames... -[2024-07-05 12:21:49,237][06024] Decorrelating experience for 128 frames... -[2024-07-05 12:21:49,256][06022] Decorrelating experience for 128 frames... -[2024-07-05 12:21:49,261][06028] Decorrelating experience for 96 frames... -[2024-07-05 12:21:49,337][06033] Decorrelating experience for 128 frames... -[2024-07-05 12:21:49,350][06030] Decorrelating experience for 128 frames... -[2024-07-05 12:21:49,374][06031] Decorrelating experience for 160 frames... -[2024-07-05 12:21:49,406][06027] Decorrelating experience for 160 frames... -[2024-07-05 12:21:49,495][06022] Decorrelating experience for 160 frames... -[2024-07-05 12:21:49,500][06029] Decorrelating experience for 160 frames... -[2024-07-05 12:21:49,596][06035] Decorrelating experience for 192 frames... -[2024-07-05 12:21:49,602][06030] Decorrelating experience for 160 frames... -[2024-07-05 12:21:49,638][06024] Decorrelating experience for 160 frames... -[2024-07-05 12:21:49,655][06025] Decorrelating experience for 192 frames... -[2024-07-05 12:21:49,718][06034] Decorrelating experience for 128 frames... -[2024-07-05 12:21:49,776][06029] Decorrelating experience for 192 frames... -[2024-07-05 12:21:49,793][06022] Decorrelating experience for 192 frames... -[2024-07-05 12:21:49,838][06051] Decorrelating experience for 32 frames... -[2024-07-05 12:21:49,850][06030] Decorrelating experience for 192 frames... -[2024-07-05 12:21:49,853][06035] Decorrelating experience for 224 frames... -[2024-07-05 12:21:49,937][06028] Decorrelating experience for 128 frames... -[2024-07-05 12:21:49,967][05794] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 150421504. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 12:21:50,006][06024] Decorrelating experience for 192 frames... -[2024-07-05 12:21:50,035][06025] Decorrelating experience for 224 frames... -[2024-07-05 12:21:50,058][06032] Decorrelating experience for 0 frames... -[2024-07-05 12:21:50,118][06051] Decorrelating experience for 64 frames... -[2024-07-05 12:21:50,152][06029] Decorrelating experience for 224 frames... -[2024-07-05 12:21:50,157][06034] Decorrelating experience for 160 frames... -[2024-07-05 12:21:50,270][06022] Decorrelating experience for 224 frames... -[2024-07-05 12:21:50,279][06028] Decorrelating experience for 160 frames... -[2024-07-05 12:21:50,289][06031] Decorrelating experience for 192 frames... -[2024-07-05 12:21:50,376][06032] Decorrelating experience for 32 frames... -[2024-07-05 12:21:50,391][06051] Decorrelating experience for 96 frames... -[2024-07-05 12:21:50,412][06030] Decorrelating experience for 224 frames... -[2024-07-05 12:21:50,544][06028] Decorrelating experience for 192 frames... -[2024-07-05 12:21:50,563][06034] Decorrelating experience for 192 frames... -[2024-07-05 12:21:50,579][06033] Decorrelating experience for 160 frames... -[2024-07-05 12:21:50,678][06031] Decorrelating experience for 224 frames... -[2024-07-05 12:21:50,683][06024] Decorrelating experience for 224 frames... -[2024-07-05 12:21:50,690][06032] Decorrelating experience for 64 frames... -[2024-07-05 12:21:50,863][06023] Decorrelating experience for 96 frames... -[2024-07-05 12:21:50,926][06032] Decorrelating experience for 96 frames... -[2024-07-05 12:21:50,987][06034] Decorrelating experience for 224 frames... -[2024-07-05 12:21:50,991][06028] Decorrelating experience for 224 frames... -[2024-07-05 12:21:51,094][06026] Decorrelating experience for 32 frames... -[2024-07-05 12:21:51,254][06023] Decorrelating experience for 128 frames... -[2024-07-05 12:21:51,286][06033] Decorrelating experience for 192 frames... -[2024-07-05 12:21:51,383][06032] Decorrelating experience for 128 frames... -[2024-07-05 12:21:51,387][06052] Decorrelating experience for 32 frames... -[2024-07-05 12:21:51,419][06026] Decorrelating experience for 64 frames... -[2024-07-05 12:21:51,607][06001] Signal inference workers to stop experience collection... -[2024-07-05 12:21:51,613][06021] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 12:21:51,624][06023] Decorrelating experience for 160 frames... -[2024-07-05 12:21:51,660][06033] Decorrelating experience for 224 frames... -[2024-07-05 12:21:51,672][06052] Decorrelating experience for 64 frames... -[2024-07-05 12:21:51,676][06051] Decorrelating experience for 128 frames... -[2024-07-05 12:21:51,858][06023] Decorrelating experience for 192 frames... -[2024-07-05 12:21:51,874][06052] Decorrelating experience for 96 frames... -[2024-07-05 12:21:51,899][06051] Decorrelating experience for 160 frames... -[2024-07-05 12:21:51,914][06026] Decorrelating experience for 96 frames... -[2024-07-05 12:21:51,992][06027] Decorrelating experience for 192 frames... -[2024-07-05 12:21:52,145][06032] Decorrelating experience for 160 frames... -[2024-07-05 12:21:52,148][06023] Decorrelating experience for 224 frames... -[2024-07-05 12:21:52,186][06051] Decorrelating experience for 192 frames... -[2024-07-05 12:21:52,218][06052] Decorrelating experience for 128 frames... -[2024-07-05 12:21:52,303][06026] Decorrelating experience for 128 frames... -[2024-07-05 12:21:52,390][06032] Decorrelating experience for 192 frames... -[2024-07-05 12:21:52,428][06051] Decorrelating experience for 224 frames... -[2024-07-05 12:21:52,442][06052] Decorrelating experience for 160 frames... -[2024-07-05 12:21:52,445][06027] Decorrelating experience for 224 frames... -[2024-07-05 12:21:52,606][06026] Decorrelating experience for 160 frames... -[2024-07-05 12:21:52,667][06032] Decorrelating experience for 224 frames... -[2024-07-05 12:21:52,688][06052] Decorrelating experience for 192 frames... -[2024-07-05 12:21:52,832][06026] Decorrelating experience for 192 frames... -[2024-07-05 12:21:52,910][06052] Decorrelating experience for 224 frames... -[2024-07-05 12:21:53,064][06026] Decorrelating experience for 224 frames... -[2024-07-05 12:21:53,475][06001] Signal inference workers to resume experience collection... -[2024-07-05 12:21:53,475][06021] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 12:21:54,967][05794] Fps is (10 sec: 9830.6, 60 sec: 9830.6, 300 sec: 9830.6). Total num frames: 150470656. Throughput: 0: 1108.8. Samples: 5544. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 12:21:54,968][05794] Avg episode reward: [(0, '3.198')] -[2024-07-05 12:21:55,744][06021] Updated weights for policy 0, policy_version 20814 (0.0102) -[2024-07-05 12:21:57,954][06021] Updated weights for policy 0, policy_version 20824 (0.0008) -[2024-07-05 12:21:59,967][05794] Fps is (10 sec: 22938.1, 60 sec: 22938.1, 300 sec: 22938.1). Total num frames: 150650880. Throughput: 0: 5130.5. Samples: 51304. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 12:21:59,968][05794] Avg episode reward: [(0, '34.177')] -[2024-07-05 12:22:00,175][06021] Updated weights for policy 0, policy_version 20834 (0.0011) -[2024-07-05 12:22:00,494][05794] Heartbeat connected on Batcher_0 -[2024-07-05 12:22:00,498][05794] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 12:22:00,508][05794] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 12:22:00,510][05794] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 12:22:00,512][05794] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 12:22:00,520][05794] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 12:22:00,521][05794] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 12:22:00,524][05794] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 12:22:00,526][05794] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 12:22:00,526][05794] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 12:22:00,527][05794] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 12:22:00,540][05794] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 12:22:00,564][05794] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 12:22:00,569][05794] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 12:22:00,570][05794] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 12:22:00,571][05794] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 12:22:00,572][05794] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 12:22:00,577][05794] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 12:22:00,616][05794] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 12:22:02,405][06021] Updated weights for policy 0, policy_version 20844 (0.0012) -[2024-07-05 12:22:04,599][06021] Updated weights for policy 0, policy_version 20854 (0.0010) -[2024-07-05 12:22:04,967][05794] Fps is (10 sec: 36863.9, 60 sec: 27853.0, 300 sec: 27853.0). Total num frames: 150839296. Throughput: 0: 7140.6. Samples: 107108. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 12:22:04,968][05794] Avg episode reward: [(0, '50.068')] -[2024-07-05 12:22:06,794][06021] Updated weights for policy 0, policy_version 20864 (0.0010) -[2024-07-05 12:22:08,968][06021] Updated weights for policy 0, policy_version 20874 (0.0009) -[2024-07-05 12:22:09,967][05794] Fps is (10 sec: 37681.0, 60 sec: 30309.9, 300 sec: 30309.9). Total num frames: 151027712. Throughput: 0: 6739.3. Samples: 134788. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:22:09,968][05794] Avg episode reward: [(0, '51.952')] -[2024-07-05 12:22:11,132][06021] Updated weights for policy 0, policy_version 20884 (0.0009) -[2024-07-05 12:22:13,368][06021] Updated weights for policy 0, policy_version 20894 (0.0008) -[2024-07-05 12:22:14,967][05794] Fps is (10 sec: 37683.4, 60 sec: 31785.1, 300 sec: 31785.1). Total num frames: 151216128. Throughput: 0: 7629.6. Samples: 190740. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:22:14,968][05794] Avg episode reward: [(0, '48.319')] -[2024-07-05 12:22:15,542][06021] Updated weights for policy 0, policy_version 20904 (0.0014) -[2024-07-05 12:22:17,730][06021] Updated weights for policy 0, policy_version 20914 (0.0009) -[2024-07-05 12:22:19,908][06021] Updated weights for policy 0, policy_version 20924 (0.0009) -[2024-07-05 12:22:19,967][05794] Fps is (10 sec: 37684.1, 60 sec: 32767.9, 300 sec: 32767.9). Total num frames: 151404544. Throughput: 0: 8247.6. Samples: 247428. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:22:19,968][05794] Avg episode reward: [(0, '45.791')] -[2024-07-05 12:22:22,145][06021] Updated weights for policy 0, policy_version 20934 (0.0012) -[2024-07-05 12:22:24,338][06021] Updated weights for policy 0, policy_version 20944 (0.0010) -[2024-07-05 12:22:24,967][05794] Fps is (10 sec: 36862.7, 60 sec: 33235.9, 300 sec: 33235.9). Total num frames: 151584768. Throughput: 0: 7860.0. Samples: 275100. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:22:24,969][05794] Avg episode reward: [(0, '51.461')] -[2024-07-05 12:22:26,539][06021] Updated weights for policy 0, policy_version 20954 (0.0009) -[2024-07-05 12:22:28,688][06021] Updated weights for policy 0, policy_version 20964 (0.0009) -[2024-07-05 12:22:29,967][05794] Fps is (10 sec: 36864.6, 60 sec: 33792.1, 300 sec: 33792.1). Total num frames: 151773184. Throughput: 0: 8284.4. Samples: 331376. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:22:29,968][05794] Avg episode reward: [(0, '46.965')] -[2024-07-05 12:22:30,902][06021] Updated weights for policy 0, policy_version 20974 (0.0010) -[2024-07-05 12:22:33,158][06021] Updated weights for policy 0, policy_version 20984 (0.0009) -[2024-07-05 12:22:34,967][05794] Fps is (10 sec: 37684.4, 60 sec: 34224.4, 300 sec: 34224.4). Total num frames: 151961600. Throughput: 0: 8592.9. Samples: 386680. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:22:34,968][05794] Avg episode reward: [(0, '49.356')] -[2024-07-05 12:22:35,433][06021] Updated weights for policy 0, policy_version 20994 (0.0010) -[2024-07-05 12:22:37,669][06021] Updated weights for policy 0, policy_version 21004 (0.0009) -[2024-07-05 12:22:39,800][06021] Updated weights for policy 0, policy_version 21014 (0.0011) -[2024-07-05 12:22:39,967][05794] Fps is (10 sec: 36863.6, 60 sec: 34406.4, 300 sec: 34406.4). Total num frames: 152141824. Throughput: 0: 9069.7. Samples: 413680. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:22:39,968][05794] Avg episode reward: [(0, '49.055')] -[2024-07-05 12:22:42,077][06021] Updated weights for policy 0, policy_version 21024 (0.0009) -[2024-07-05 12:22:44,351][06021] Updated weights for policy 0, policy_version 21034 (0.0011) -[2024-07-05 12:22:44,967][05794] Fps is (10 sec: 36044.8, 60 sec: 34555.4, 300 sec: 34555.4). Total num frames: 152322048. Throughput: 0: 9281.5. Samples: 468972. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:22:44,968][05794] Avg episode reward: [(0, '50.450')] -[2024-07-05 12:22:46,554][06021] Updated weights for policy 0, policy_version 21044 (0.0009) -[2024-07-05 12:22:48,719][06021] Updated weights for policy 0, policy_version 21054 (0.0009) -[2024-07-05 12:22:49,967][05794] Fps is (10 sec: 36864.6, 60 sec: 34816.1, 300 sec: 34816.1). Total num frames: 152510464. Throughput: 0: 9282.2. Samples: 524808. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:22:49,969][05794] Avg episode reward: [(0, '51.617')] -[2024-07-05 12:22:50,996][06021] Updated weights for policy 0, policy_version 21064 (0.0013) -[2024-07-05 12:22:53,249][06021] Updated weights for policy 0, policy_version 21074 (0.0009) -[2024-07-05 12:22:54,971][05794] Fps is (10 sec: 36848.9, 60 sec: 36998.0, 300 sec: 34908.4). Total num frames: 152690688. Throughput: 0: 9263.5. Samples: 551680. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:22:54,972][05794] Avg episode reward: [(0, '51.894')] -[2024-07-05 12:22:55,440][06021] Updated weights for policy 0, policy_version 21084 (0.0013) -[2024-07-05 12:22:57,674][06021] Updated weights for policy 0, policy_version 21094 (0.0010) -[2024-07-05 12:22:59,888][06021] Updated weights for policy 0, policy_version 21104 (0.0010) -[2024-07-05 12:22:59,967][05794] Fps is (10 sec: 36863.5, 60 sec: 37136.9, 300 sec: 35108.6). Total num frames: 152879104. Throughput: 0: 9250.1. Samples: 606996. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:22:59,968][05794] Avg episode reward: [(0, '51.134')] -[2024-07-05 12:23:02,097][06021] Updated weights for policy 0, policy_version 21114 (0.0009) -[2024-07-05 12:23:04,209][06021] Updated weights for policy 0, policy_version 21124 (0.0011) -[2024-07-05 12:23:04,967][05794] Fps is (10 sec: 37698.3, 60 sec: 37137.0, 300 sec: 35280.2). Total num frames: 153067520. Throughput: 0: 9254.1. Samples: 663860. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:23:04,968][05794] Avg episode reward: [(0, '52.418')] -[2024-07-05 12:23:06,428][06021] Updated weights for policy 0, policy_version 21134 (0.0014) -[2024-07-05 12:23:08,670][06021] Updated weights for policy 0, policy_version 21144 (0.0010) -[2024-07-05 12:23:09,977][05794] Fps is (10 sec: 36825.1, 60 sec: 36994.2, 300 sec: 35323.3). Total num frames: 153247744. Throughput: 0: 9234.9. Samples: 690768. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:23:09,979][05794] Avg episode reward: [(0, '48.713')] -[2024-07-05 12:23:10,871][06021] Updated weights for policy 0, policy_version 21154 (0.0010) -[2024-07-05 12:23:13,072][06021] Updated weights for policy 0, policy_version 21164 (0.0014) -[2024-07-05 12:23:14,967][05794] Fps is (10 sec: 37683.8, 60 sec: 37137.1, 300 sec: 35563.0). Total num frames: 153444352. Throughput: 0: 9234.1. Samples: 746908. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:23:14,968][05794] Avg episode reward: [(0, '48.843')] -[2024-07-05 12:23:15,223][06021] Updated weights for policy 0, policy_version 21174 (0.0013) -[2024-07-05 12:23:17,408][06021] Updated weights for policy 0, policy_version 21184 (0.0010) -[2024-07-05 12:23:19,570][06021] Updated weights for policy 0, policy_version 21194 (0.0009) -[2024-07-05 12:23:19,967][05794] Fps is (10 sec: 37722.9, 60 sec: 37000.6, 300 sec: 35589.7). Total num frames: 153624576. Throughput: 0: 9261.8. Samples: 803464. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:23:19,969][05794] Avg episode reward: [(0, '47.863')] -[2024-07-05 12:23:21,747][06021] Updated weights for policy 0, policy_version 21204 (0.0010) -[2024-07-05 12:23:23,917][06021] Updated weights for policy 0, policy_version 21214 (0.0011) -[2024-07-05 12:23:24,967][05794] Fps is (10 sec: 36863.9, 60 sec: 37137.3, 300 sec: 35699.9). Total num frames: 153812992. Throughput: 0: 9288.7. Samples: 831672. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:23:24,968][05794] Avg episode reward: [(0, '49.723')] -[2024-07-05 12:23:26,099][06021] Updated weights for policy 0, policy_version 21224 (0.0011) -[2024-07-05 12:23:28,354][06021] Updated weights for policy 0, policy_version 21234 (0.0013) -[2024-07-05 12:23:29,967][05794] Fps is (10 sec: 37683.5, 60 sec: 37137.0, 300 sec: 35799.0). Total num frames: 154001408. Throughput: 0: 9302.4. Samples: 887580. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:23:29,968][05794] Avg episode reward: [(0, '49.832')] -[2024-07-05 12:23:30,501][06021] Updated weights for policy 0, policy_version 21244 (0.0012) -[2024-07-05 12:23:32,637][06021] Updated weights for policy 0, policy_version 21254 (0.0014) -[2024-07-05 12:23:34,849][06021] Updated weights for policy 0, policy_version 21264 (0.0009) -[2024-07-05 12:23:34,967][05794] Fps is (10 sec: 37682.8, 60 sec: 37137.0, 300 sec: 35888.8). Total num frames: 154189824. Throughput: 0: 9320.7. Samples: 944240. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:23:34,968][05794] Avg episode reward: [(0, '47.122')] -[2024-07-05 12:23:35,043][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000021265_154198016.pth... -[2024-07-05 12:23:35,127][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000020754_150011904.pth -[2024-07-05 12:23:37,075][06021] Updated weights for policy 0, policy_version 21274 (0.0011) -[2024-07-05 12:23:39,264][06021] Updated weights for policy 0, policy_version 21284 (0.0009) -[2024-07-05 12:23:39,967][05794] Fps is (10 sec: 37683.8, 60 sec: 37273.7, 300 sec: 35970.4). Total num frames: 154378240. Throughput: 0: 9345.8. Samples: 972204. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:23:39,968][05794] Avg episode reward: [(0, '49.968')] -[2024-07-05 12:23:41,449][06021] Updated weights for policy 0, policy_version 21294 (0.0014) -[2024-07-05 12:23:43,638][06021] Updated weights for policy 0, policy_version 21304 (0.0010) -[2024-07-05 12:23:44,967][05794] Fps is (10 sec: 37683.5, 60 sec: 37410.1, 300 sec: 36044.8). Total num frames: 154566656. Throughput: 0: 9360.1. Samples: 1028200. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:23:44,968][05794] Avg episode reward: [(0, '48.170')] -[2024-07-05 12:23:45,774][06021] Updated weights for policy 0, policy_version 21314 (0.0012) -[2024-07-05 12:23:48,003][06021] Updated weights for policy 0, policy_version 21324 (0.0009) -[2024-07-05 12:23:49,967][05794] Fps is (10 sec: 37683.1, 60 sec: 37410.1, 300 sec: 36113.1). Total num frames: 154755072. Throughput: 0: 9347.9. Samples: 1084516. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:23:49,968][05794] Avg episode reward: [(0, '49.588')] -[2024-07-05 12:23:50,162][06021] Updated weights for policy 0, policy_version 21334 (0.0009) -[2024-07-05 12:23:52,314][06021] Updated weights for policy 0, policy_version 21344 (0.0014) -[2024-07-05 12:23:54,518][06021] Updated weights for policy 0, policy_version 21354 (0.0009) -[2024-07-05 12:23:54,967][05794] Fps is (10 sec: 36863.9, 60 sec: 37412.7, 300 sec: 36110.4). Total num frames: 154935296. Throughput: 0: 9381.2. Samples: 1112820. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:23:54,968][05794] Avg episode reward: [(0, '48.480')] -[2024-07-05 12:23:56,715][06021] Updated weights for policy 0, policy_version 21364 (0.0010) -[2024-07-05 12:23:58,879][06021] Updated weights for policy 0, policy_version 21374 (0.0009) -[2024-07-05 12:23:59,967][05794] Fps is (10 sec: 37683.2, 60 sec: 37546.8, 300 sec: 36233.9). Total num frames: 155131904. Throughput: 0: 9394.7. Samples: 1169672. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:23:59,968][05794] Avg episode reward: [(0, '48.898')] -[2024-07-05 12:24:01,036][06021] Updated weights for policy 0, policy_version 21384 (0.0010) -[2024-07-05 12:24:03,251][06021] Updated weights for policy 0, policy_version 21394 (0.0009) -[2024-07-05 12:24:04,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37410.2, 300 sec: 36226.9). Total num frames: 155312128. Throughput: 0: 9382.0. Samples: 1225652. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:24:04,969][05794] Avg episode reward: [(0, '49.267')] -[2024-07-05 12:24:05,428][06021] Updated weights for policy 0, policy_version 21404 (0.0011) -[2024-07-05 12:24:07,593][06021] Updated weights for policy 0, policy_version 21414 (0.0014) -[2024-07-05 12:24:09,838][06021] Updated weights for policy 0, policy_version 21424 (0.0011) -[2024-07-05 12:24:09,967][05794] Fps is (10 sec: 36864.2, 60 sec: 37553.4, 300 sec: 36278.9). Total num frames: 155500544. Throughput: 0: 9378.7. Samples: 1253712. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:24:09,968][05794] Avg episode reward: [(0, '51.731')] -[2024-07-05 12:24:12,004][06021] Updated weights for policy 0, policy_version 21434 (0.0010) -[2024-07-05 12:24:14,195][06021] Updated weights for policy 0, policy_version 21444 (0.0011) -[2024-07-05 12:24:14,967][05794] Fps is (10 sec: 37683.5, 60 sec: 37410.1, 300 sec: 36327.3). Total num frames: 155688960. Throughput: 0: 9383.5. Samples: 1309836. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:24:14,968][05794] Avg episode reward: [(0, '48.419')] -[2024-07-05 12:24:16,373][06021] Updated weights for policy 0, policy_version 21454 (0.0009) -[2024-07-05 12:24:18,594][06021] Updated weights for policy 0, policy_version 21464 (0.0012) -[2024-07-05 12:24:19,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37546.8, 300 sec: 36372.5). Total num frames: 155877376. Throughput: 0: 9371.3. Samples: 1365948. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:24:19,968][05794] Avg episode reward: [(0, '51.587')] -[2024-07-05 12:24:20,778][06021] Updated weights for policy 0, policy_version 21474 (0.0009) -[2024-07-05 12:24:22,980][06021] Updated weights for policy 0, policy_version 21484 (0.0011) -[2024-07-05 12:24:24,967][05794] Fps is (10 sec: 36863.3, 60 sec: 37410.0, 300 sec: 36361.9). Total num frames: 156057600. Throughput: 0: 9366.1. Samples: 1393680. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:24:24,968][05794] Avg episode reward: [(0, '50.196')] -[2024-07-05 12:24:25,200][06021] Updated weights for policy 0, policy_version 21494 (0.0009) -[2024-07-05 12:24:27,327][06021] Updated weights for policy 0, policy_version 21504 (0.0010) -[2024-07-05 12:24:29,544][06021] Updated weights for policy 0, policy_version 21514 (0.0010) -[2024-07-05 12:24:29,967][05794] Fps is (10 sec: 36863.0, 60 sec: 37410.0, 300 sec: 36403.2). Total num frames: 156246016. Throughput: 0: 9363.6. Samples: 1449564. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:24:29,968][05794] Avg episode reward: [(0, '51.172')] -[2024-07-05 12:24:31,726][06021] Updated weights for policy 0, policy_version 21524 (0.0010) -[2024-07-05 12:24:33,968][06021] Updated weights for policy 0, policy_version 21534 (0.0010) -[2024-07-05 12:24:34,968][05794] Fps is (10 sec: 37678.9, 60 sec: 37409.4, 300 sec: 36441.7). Total num frames: 156434432. Throughput: 0: 9363.8. Samples: 1505900. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:24:34,970][05794] Avg episode reward: [(0, '52.310')] -[2024-07-05 12:24:36,183][06021] Updated weights for policy 0, policy_version 21544 (0.0012) -[2024-07-05 12:24:38,353][06021] Updated weights for policy 0, policy_version 21554 (0.0014) -[2024-07-05 12:24:39,967][05794] Fps is (10 sec: 37683.8, 60 sec: 37410.0, 300 sec: 36478.5). Total num frames: 156622848. Throughput: 0: 9345.0. Samples: 1533344. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:24:39,968][05794] Avg episode reward: [(0, '49.028')] -[2024-07-05 12:24:40,572][06021] Updated weights for policy 0, policy_version 21564 (0.0009) -[2024-07-05 12:24:42,865][06021] Updated weights for policy 0, policy_version 21574 (0.0013) -[2024-07-05 12:24:44,967][05794] Fps is (10 sec: 36869.1, 60 sec: 37273.6, 300 sec: 36466.1). Total num frames: 156803072. Throughput: 0: 9289.7. Samples: 1587708. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:24:44,968][05794] Avg episode reward: [(0, '48.222')] -[2024-07-05 12:24:45,115][06021] Updated weights for policy 0, policy_version 21584 (0.0013) -[2024-07-05 12:24:47,440][06021] Updated weights for policy 0, policy_version 21594 (0.0009) -[2024-07-05 12:24:49,688][06021] Updated weights for policy 0, policy_version 21604 (0.0010) -[2024-07-05 12:24:49,967][05794] Fps is (10 sec: 36045.1, 60 sec: 37137.0, 300 sec: 36454.4). Total num frames: 156983296. Throughput: 0: 9259.6. Samples: 1642332. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:24:49,968][05794] Avg episode reward: [(0, '51.090')] -[2024-07-05 12:24:51,869][06021] Updated weights for policy 0, policy_version 21614 (0.0013) -[2024-07-05 12:24:54,069][06021] Updated weights for policy 0, policy_version 21624 (0.0010) -[2024-07-05 12:24:54,967][05794] Fps is (10 sec: 36043.1, 60 sec: 37136.8, 300 sec: 36443.3). Total num frames: 157163520. Throughput: 0: 9256.4. Samples: 1670256. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:24:54,968][05794] Avg episode reward: [(0, '51.194')] -[2024-07-05 12:24:56,270][06021] Updated weights for policy 0, policy_version 21634 (0.0013) -[2024-07-05 12:24:58,471][06021] Updated weights for policy 0, policy_version 21644 (0.0010) -[2024-07-05 12:24:59,967][05794] Fps is (10 sec: 36862.5, 60 sec: 37000.3, 300 sec: 36475.9). Total num frames: 157351936. Throughput: 0: 9253.6. Samples: 1726252. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:24:59,969][05794] Avg episode reward: [(0, '50.047')] -[2024-07-05 12:25:00,671][06021] Updated weights for policy 0, policy_version 21654 (0.0011) -[2024-07-05 12:25:02,803][06021] Updated weights for policy 0, policy_version 21664 (0.0009) -[2024-07-05 12:25:04,967][05794] Fps is (10 sec: 37684.1, 60 sec: 37137.0, 300 sec: 36506.9). Total num frames: 157540352. Throughput: 0: 9249.9. Samples: 1782196. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:25:04,974][05794] Avg episode reward: [(0, '50.927')] -[2024-07-05 12:25:05,030][06021] Updated weights for policy 0, policy_version 21674 (0.0010) -[2024-07-05 12:25:07,212][06021] Updated weights for policy 0, policy_version 21684 (0.0012) -[2024-07-05 12:25:09,394][06021] Updated weights for policy 0, policy_version 21694 (0.0013) -[2024-07-05 12:25:09,967][05794] Fps is (10 sec: 37684.8, 60 sec: 37137.0, 300 sec: 36536.3). Total num frames: 157728768. Throughput: 0: 9259.4. Samples: 1810352. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:25:09,968][05794] Avg episode reward: [(0, '51.367')] -[2024-07-05 12:25:11,602][06021] Updated weights for policy 0, policy_version 21704 (0.0009) -[2024-07-05 12:25:13,839][06021] Updated weights for policy 0, policy_version 21714 (0.0009) -[2024-07-05 12:25:14,967][05794] Fps is (10 sec: 37683.9, 60 sec: 37137.1, 300 sec: 36564.3). Total num frames: 157917184. Throughput: 0: 9264.9. Samples: 1866484. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:25:14,968][05794] Avg episode reward: [(0, '48.860')] -[2024-07-05 12:25:15,986][06021] Updated weights for policy 0, policy_version 21724 (0.0009) -[2024-07-05 12:25:18,142][06021] Updated weights for policy 0, policy_version 21734 (0.0014) -[2024-07-05 12:25:19,967][05794] Fps is (10 sec: 37683.3, 60 sec: 37137.1, 300 sec: 36591.0). Total num frames: 158105600. Throughput: 0: 9259.6. Samples: 1922568. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:25:19,968][05794] Avg episode reward: [(0, '50.533')] -[2024-07-05 12:25:20,319][06021] Updated weights for policy 0, policy_version 21744 (0.0008) -[2024-07-05 12:25:22,528][06021] Updated weights for policy 0, policy_version 21754 (0.0011) -[2024-07-05 12:25:24,751][06021] Updated weights for policy 0, policy_version 21764 (0.0012) -[2024-07-05 12:25:24,967][05794] Fps is (10 sec: 37683.2, 60 sec: 37273.7, 300 sec: 36616.4). Total num frames: 158294016. Throughput: 0: 9269.2. Samples: 1950456. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:25:24,968][05794] Avg episode reward: [(0, '52.681')] -[2024-07-05 12:25:26,916][06021] Updated weights for policy 0, policy_version 21774 (0.0009) -[2024-07-05 12:25:29,151][06021] Updated weights for policy 0, policy_version 21784 (0.0012) -[2024-07-05 12:25:29,967][05794] Fps is (10 sec: 36864.2, 60 sec: 37137.3, 300 sec: 36603.4). Total num frames: 158474240. Throughput: 0: 9312.5. Samples: 2006772. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:25:29,967][05794] Avg episode reward: [(0, '48.812')] -[2024-07-05 12:25:31,286][06021] Updated weights for policy 0, policy_version 21794 (0.0010) -[2024-07-05 12:25:33,469][06021] Updated weights for policy 0, policy_version 21804 (0.0012) -[2024-07-05 12:25:34,967][05794] Fps is (10 sec: 36863.8, 60 sec: 37137.9, 300 sec: 36627.4). Total num frames: 158662656. Throughput: 0: 9332.7. Samples: 2062304. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:25:34,968][05794] Avg episode reward: [(0, '48.414')] -[2024-07-05 12:25:34,974][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000021810_158662656.pth... -[2024-07-05 12:25:35,074][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000020804_150421504.pth -[2024-07-05 12:25:35,695][06021] Updated weights for policy 0, policy_version 21814 (0.0012) -[2024-07-05 12:25:37,914][06021] Updated weights for policy 0, policy_version 21824 (0.0010) -[2024-07-05 12:25:39,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37137.2, 300 sec: 36650.3). Total num frames: 158851072. Throughput: 0: 9346.1. Samples: 2090828. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:25:39,968][05794] Avg episode reward: [(0, '53.143')] -[2024-07-05 12:25:40,112][06021] Updated weights for policy 0, policy_version 21834 (0.0013) -[2024-07-05 12:25:42,314][06021] Updated weights for policy 0, policy_version 21844 (0.0012) -[2024-07-05 12:25:44,469][06021] Updated weights for policy 0, policy_version 21854 (0.0009) -[2024-07-05 12:25:44,967][05794] Fps is (10 sec: 37683.3, 60 sec: 37273.6, 300 sec: 36672.3). Total num frames: 159039488. Throughput: 0: 9341.4. Samples: 2146612. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:25:44,968][05794] Avg episode reward: [(0, '49.859')] -[2024-07-05 12:25:46,676][06021] Updated weights for policy 0, policy_version 21864 (0.0016) -[2024-07-05 12:25:48,883][06021] Updated weights for policy 0, policy_version 21874 (0.0009) -[2024-07-05 12:25:49,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37410.1, 300 sec: 36693.4). Total num frames: 159227904. Throughput: 0: 9343.1. Samples: 2202636. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:25:49,968][05794] Avg episode reward: [(0, '47.743')] -[2024-07-05 12:25:51,044][06021] Updated weights for policy 0, policy_version 21884 (0.0010) -[2024-07-05 12:25:53,228][06021] Updated weights for policy 0, policy_version 21894 (0.0010) -[2024-07-05 12:25:54,967][05794] Fps is (10 sec: 37683.2, 60 sec: 37546.9, 300 sec: 36713.6). Total num frames: 159416320. Throughput: 0: 9342.4. Samples: 2230760. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:25:54,968][05794] Avg episode reward: [(0, '51.805')] -[2024-07-05 12:25:55,367][06021] Updated weights for policy 0, policy_version 21904 (0.0010) -[2024-07-05 12:25:57,556][06021] Updated weights for policy 0, policy_version 21914 (0.0012) -[2024-07-05 12:25:59,724][06021] Updated weights for policy 0, policy_version 21924 (0.0009) -[2024-07-05 12:25:59,972][05794] Fps is (10 sec: 36843.3, 60 sec: 37406.9, 300 sec: 36699.4). Total num frames: 159596544. Throughput: 0: 9347.9. Samples: 2287192. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:25:59,974][05794] Avg episode reward: [(0, '49.004')] -[2024-07-05 12:26:01,968][06021] Updated weights for policy 0, policy_version 21934 (0.0009) -[2024-07-05 12:26:04,117][06021] Updated weights for policy 0, policy_version 21944 (0.0009) -[2024-07-05 12:26:04,990][05794] Fps is (10 sec: 36778.0, 60 sec: 37395.7, 300 sec: 36716.1). Total num frames: 159784960. Throughput: 0: 9342.8. Samples: 2343212. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:26:04,995][05794] Avg episode reward: [(0, '51.053')] -[2024-07-05 12:26:06,361][06021] Updated weights for policy 0, policy_version 21954 (0.0010) -[2024-07-05 12:26:08,552][06021] Updated weights for policy 0, policy_version 21964 (0.0016) -[2024-07-05 12:26:09,967][05794] Fps is (10 sec: 37704.2, 60 sec: 37410.1, 300 sec: 36738.0). Total num frames: 159973376. Throughput: 0: 9355.7. Samples: 2371464. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:26:09,968][05794] Avg episode reward: [(0, '49.749')] -[2024-07-05 12:26:10,748][06021] Updated weights for policy 0, policy_version 21974 (0.0009) -[2024-07-05 12:26:12,907][06021] Updated weights for policy 0, policy_version 21984 (0.0010) -[2024-07-05 12:26:14,967][05794] Fps is (10 sec: 37770.9, 60 sec: 37410.0, 300 sec: 36755.8). Total num frames: 160161792. Throughput: 0: 9343.4. Samples: 2427228. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:26:14,968][05794] Avg episode reward: [(0, '46.896')] -[2024-07-05 12:26:15,059][06021] Updated weights for policy 0, policy_version 21994 (0.0013) -[2024-07-05 12:26:17,289][06021] Updated weights for policy 0, policy_version 22004 (0.0009) -[2024-07-05 12:26:19,487][06021] Updated weights for policy 0, policy_version 22014 (0.0012) -[2024-07-05 12:26:19,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37410.1, 300 sec: 36773.0). Total num frames: 160350208. Throughput: 0: 9360.2. Samples: 2483512. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0) -[2024-07-05 12:26:19,968][05794] Avg episode reward: [(0, '47.899')] -[2024-07-05 12:26:21,676][06021] Updated weights for policy 0, policy_version 22024 (0.0009) -[2024-07-05 12:26:23,854][06021] Updated weights for policy 0, policy_version 22034 (0.0010) -[2024-07-05 12:26:24,975][05794] Fps is (10 sec: 36837.9, 60 sec: 37269.1, 300 sec: 36758.8). Total num frames: 160530432. Throughput: 0: 9347.1. Samples: 2511516. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0) -[2024-07-05 12:26:24,977][05794] Avg episode reward: [(0, '49.845')] -[2024-07-05 12:26:26,058][06021] Updated weights for policy 0, policy_version 22044 (0.0010) -[2024-07-05 12:26:28,239][06021] Updated weights for policy 0, policy_version 22054 (0.0009) -[2024-07-05 12:26:29,967][05794] Fps is (10 sec: 37683.3, 60 sec: 37546.6, 300 sec: 36805.5). Total num frames: 160727040. Throughput: 0: 9363.2. Samples: 2567956. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0) -[2024-07-05 12:26:29,968][05794] Avg episode reward: [(0, '51.189')] -[2024-07-05 12:26:30,405][06021] Updated weights for policy 0, policy_version 22064 (0.0009) -[2024-07-05 12:26:32,634][06021] Updated weights for policy 0, policy_version 22074 (0.0010) -[2024-07-05 12:26:34,842][06021] Updated weights for policy 0, policy_version 22084 (0.0013) -[2024-07-05 12:26:34,975][05794] Fps is (10 sec: 37686.6, 60 sec: 37406.2, 300 sec: 36791.3). Total num frames: 160907264. Throughput: 0: 9370.9. Samples: 2624384. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:26:34,981][05794] Avg episode reward: [(0, '49.467')] -[2024-07-05 12:26:37,023][06021] Updated weights for policy 0, policy_version 22094 (0.0011) -[2024-07-05 12:26:39,162][06021] Updated weights for policy 0, policy_version 22104 (0.0010) -[2024-07-05 12:26:39,967][05794] Fps is (10 sec: 36862.4, 60 sec: 37409.8, 300 sec: 36807.5). Total num frames: 161095680. Throughput: 0: 9370.6. Samples: 2652440. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:26:39,969][05794] Avg episode reward: [(0, '48.206')] -[2024-07-05 12:26:41,312][06021] Updated weights for policy 0, policy_version 22114 (0.0016) -[2024-07-05 12:26:43,500][06021] Updated weights for policy 0, policy_version 22124 (0.0010) -[2024-07-05 12:26:44,967][05794] Fps is (10 sec: 37706.8, 60 sec: 37410.1, 300 sec: 36822.4). Total num frames: 161284096. Throughput: 0: 9370.5. Samples: 2708812. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:26:44,968][05794] Avg episode reward: [(0, '48.762')] -[2024-07-05 12:26:45,685][06021] Updated weights for policy 0, policy_version 22134 (0.0010) -[2024-07-05 12:26:47,890][06021] Updated weights for policy 0, policy_version 22144 (0.0010) -[2024-07-05 12:26:49,967][05794] Fps is (10 sec: 37685.2, 60 sec: 37410.1, 300 sec: 37294.4). Total num frames: 161472512. Throughput: 0: 9376.5. Samples: 2764936. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:26:49,968][05794] Avg episode reward: [(0, '49.751')] -[2024-07-05 12:26:50,078][06021] Updated weights for policy 0, policy_version 22154 (0.0009) -[2024-07-05 12:26:52,273][06021] Updated weights for policy 0, policy_version 22164 (0.0010) -[2024-07-05 12:26:54,451][06021] Updated weights for policy 0, policy_version 22174 (0.0010) -[2024-07-05 12:26:54,967][05794] Fps is (10 sec: 37682.7, 60 sec: 37410.0, 300 sec: 37322.2). Total num frames: 161660928. Throughput: 0: 9366.1. Samples: 2792940. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:26:54,968][05794] Avg episode reward: [(0, '48.589')] -[2024-07-05 12:26:56,667][06021] Updated weights for policy 0, policy_version 22184 (0.0010) -[2024-07-05 12:26:58,825][06021] Updated weights for policy 0, policy_version 22194 (0.0009) -[2024-07-05 12:26:59,967][05794] Fps is (10 sec: 37682.6, 60 sec: 37550.1, 300 sec: 37322.2). Total num frames: 161849344. Throughput: 0: 9368.0. Samples: 2848788. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:26:59,968][05794] Avg episode reward: [(0, '51.814')] -[2024-07-05 12:27:00,997][06021] Updated weights for policy 0, policy_version 22204 (0.0009) -[2024-07-05 12:27:03,205][06021] Updated weights for policy 0, policy_version 22214 (0.0009) -[2024-07-05 12:27:04,967][05794] Fps is (10 sec: 36863.7, 60 sec: 37424.5, 300 sec: 37294.4). Total num frames: 162029568. Throughput: 0: 9371.3. Samples: 2905224. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:04,968][05794] Avg episode reward: [(0, '51.695')] -[2024-07-05 12:27:05,408][06021] Updated weights for policy 0, policy_version 22224 (0.0013) -[2024-07-05 12:27:07,619][06021] Updated weights for policy 0, policy_version 22234 (0.0011) -[2024-07-05 12:27:09,816][06021] Updated weights for policy 0, policy_version 22244 (0.0009) -[2024-07-05 12:27:09,967][05794] Fps is (10 sec: 36864.3, 60 sec: 37410.1, 300 sec: 37294.4). Total num frames: 162217984. Throughput: 0: 9370.6. Samples: 2933128. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:09,968][05794] Avg episode reward: [(0, '53.636')] -[2024-07-05 12:27:10,045][06001] Saving new best policy, reward=53.636! -[2024-07-05 12:27:12,039][06021] Updated weights for policy 0, policy_version 22254 (0.0014) -[2024-07-05 12:27:14,262][06021] Updated weights for policy 0, policy_version 22264 (0.0010) -[2024-07-05 12:27:14,967][05794] Fps is (10 sec: 37684.3, 60 sec: 37410.2, 300 sec: 37294.5). Total num frames: 162406400. Throughput: 0: 9341.5. Samples: 2988324. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:14,968][05794] Avg episode reward: [(0, '51.423')] -[2024-07-05 12:27:16,502][06021] Updated weights for policy 0, policy_version 22274 (0.0009) -[2024-07-05 12:27:18,615][06021] Updated weights for policy 0, policy_version 22284 (0.0009) -[2024-07-05 12:27:19,967][05794] Fps is (10 sec: 37683.7, 60 sec: 37410.2, 300 sec: 37322.2). Total num frames: 162594816. Throughput: 0: 9332.3. Samples: 3044280. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:19,967][05794] Avg episode reward: [(0, '47.782')] -[2024-07-05 12:27:20,829][06021] Updated weights for policy 0, policy_version 22294 (0.0012) -[2024-07-05 12:27:23,047][06021] Updated weights for policy 0, policy_version 22304 (0.0011) -[2024-07-05 12:27:24,967][05794] Fps is (10 sec: 36864.1, 60 sec: 37414.7, 300 sec: 37294.4). Total num frames: 162775040. Throughput: 0: 9334.4. Samples: 3072484. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:24,968][05794] Avg episode reward: [(0, '47.968')] -[2024-07-05 12:27:25,248][06021] Updated weights for policy 0, policy_version 22314 (0.0011) -[2024-07-05 12:27:27,459][06021] Updated weights for policy 0, policy_version 22324 (0.0011) -[2024-07-05 12:27:29,616][06021] Updated weights for policy 0, policy_version 22334 (0.0010) -[2024-07-05 12:27:29,967][05794] Fps is (10 sec: 36863.5, 60 sec: 37273.6, 300 sec: 37294.4). Total num frames: 162963456. Throughput: 0: 9318.5. Samples: 3128144. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:29,968][05794] Avg episode reward: [(0, '48.965')] -[2024-07-05 12:27:31,852][06021] Updated weights for policy 0, policy_version 22344 (0.0009) -[2024-07-05 12:27:33,998][06021] Updated weights for policy 0, policy_version 22354 (0.0011) -[2024-07-05 12:27:34,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37414.1, 300 sec: 37322.2). Total num frames: 163151872. Throughput: 0: 9324.0. Samples: 3184516. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:34,968][05794] Avg episode reward: [(0, '51.104')] -[2024-07-05 12:27:34,976][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000022358_163151872.pth... -[2024-07-05 12:27:35,068][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000021265_154198016.pth -[2024-07-05 12:27:36,174][06021] Updated weights for policy 0, policy_version 22364 (0.0016) -[2024-07-05 12:27:38,416][06021] Updated weights for policy 0, policy_version 22374 (0.0013) -[2024-07-05 12:27:39,967][05794] Fps is (10 sec: 36862.5, 60 sec: 37273.6, 300 sec: 37322.1). Total num frames: 163332096. Throughput: 0: 9321.5. Samples: 3212412. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:39,971][05794] Avg episode reward: [(0, '49.093')] -[2024-07-05 12:27:40,631][06021] Updated weights for policy 0, policy_version 22384 (0.0009) -[2024-07-05 12:27:42,812][06021] Updated weights for policy 0, policy_version 22394 (0.0010) -[2024-07-05 12:27:44,967][05794] Fps is (10 sec: 36864.0, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 163520512. Throughput: 0: 9322.0. Samples: 3268276. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:44,968][05794] Avg episode reward: [(0, '49.555')] -[2024-07-05 12:27:45,055][06021] Updated weights for policy 0, policy_version 22404 (0.0009) -[2024-07-05 12:27:47,252][06021] Updated weights for policy 0, policy_version 22414 (0.0009) -[2024-07-05 12:27:49,418][06021] Updated weights for policy 0, policy_version 22424 (0.0012) -[2024-07-05 12:27:49,967][05794] Fps is (10 sec: 37684.8, 60 sec: 37273.6, 300 sec: 37350.5). Total num frames: 163708928. Throughput: 0: 9311.5. Samples: 3324240. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:49,968][05794] Avg episode reward: [(0, '49.458')] -[2024-07-05 12:27:51,606][06021] Updated weights for policy 0, policy_version 22434 (0.0009) -[2024-07-05 12:27:53,748][06021] Updated weights for policy 0, policy_version 22444 (0.0009) -[2024-07-05 12:27:54,967][05794] Fps is (10 sec: 37683.5, 60 sec: 37273.8, 300 sec: 37350.0). Total num frames: 163897344. Throughput: 0: 9313.3. Samples: 3352224. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:54,968][05794] Avg episode reward: [(0, '49.662')] -[2024-07-05 12:27:55,930][06021] Updated weights for policy 0, policy_version 22454 (0.0010) -[2024-07-05 12:27:58,147][06021] Updated weights for policy 0, policy_version 22464 (0.0011) -[2024-07-05 12:27:59,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37273.6, 300 sec: 37350.0). Total num frames: 164085760. Throughput: 0: 9332.1. Samples: 3408268. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:27:59,968][05794] Avg episode reward: [(0, '49.548')] -[2024-07-05 12:28:00,330][06021] Updated weights for policy 0, policy_version 22474 (0.0009) -[2024-07-05 12:28:02,594][06021] Updated weights for policy 0, policy_version 22484 (0.0013) -[2024-07-05 12:28:04,775][06021] Updated weights for policy 0, policy_version 22494 (0.0013) -[2024-07-05 12:28:04,967][05794] Fps is (10 sec: 37682.8, 60 sec: 37410.3, 300 sec: 37379.1). Total num frames: 164274176. Throughput: 0: 9335.9. Samples: 3464396. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:28:04,968][05794] Avg episode reward: [(0, '51.055')] -[2024-07-05 12:28:06,929][06021] Updated weights for policy 0, policy_version 22504 (0.0009) -[2024-07-05 12:28:09,135][06021] Updated weights for policy 0, policy_version 22514 (0.0010) -[2024-07-05 12:28:09,967][05794] Fps is (10 sec: 36864.7, 60 sec: 37273.7, 300 sec: 37322.2). Total num frames: 164454400. Throughput: 0: 9339.0. Samples: 3492740. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:28:09,967][05794] Avg episode reward: [(0, '48.863')] -[2024-07-05 12:28:11,325][06021] Updated weights for policy 0, policy_version 22524 (0.0010) -[2024-07-05 12:28:13,504][06021] Updated weights for policy 0, policy_version 22534 (0.0009) -[2024-07-05 12:28:14,967][05794] Fps is (10 sec: 36864.2, 60 sec: 37273.6, 300 sec: 37350.0). Total num frames: 164642816. Throughput: 0: 9340.7. Samples: 3548476. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:28:14,968][05794] Avg episode reward: [(0, '48.245')] -[2024-07-05 12:28:15,698][06021] Updated weights for policy 0, policy_version 22544 (0.0011) -[2024-07-05 12:28:17,879][06021] Updated weights for policy 0, policy_version 22554 (0.0011) -[2024-07-05 12:28:19,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37273.6, 300 sec: 37350.0). Total num frames: 164831232. Throughput: 0: 9333.6. Samples: 3604528. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:28:19,967][05794] Avg episode reward: [(0, '48.005')] -[2024-07-05 12:28:20,118][06021] Updated weights for policy 0, policy_version 22564 (0.0009) -[2024-07-05 12:28:22,347][06021] Updated weights for policy 0, policy_version 22574 (0.0010) -[2024-07-05 12:28:24,510][06021] Updated weights for policy 0, policy_version 22584 (0.0009) -[2024-07-05 12:28:24,967][05794] Fps is (10 sec: 37683.3, 60 sec: 37410.1, 300 sec: 37350.0). Total num frames: 165019648. Throughput: 0: 9332.3. Samples: 3632360. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:28:24,968][05794] Avg episode reward: [(0, '50.257')] -[2024-07-05 12:28:26,734][06021] Updated weights for policy 0, policy_version 22594 (0.0009) -[2024-07-05 12:28:28,908][06021] Updated weights for policy 0, policy_version 22604 (0.0012) -[2024-07-05 12:28:29,967][05794] Fps is (10 sec: 36863.8, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 165199872. Throughput: 0: 9334.8. Samples: 3688340. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:28:29,968][05794] Avg episode reward: [(0, '49.315')] -[2024-07-05 12:28:31,078][06021] Updated weights for policy 0, policy_version 22614 (0.0009) -[2024-07-05 12:28:33,240][06021] Updated weights for policy 0, policy_version 22624 (0.0009) -[2024-07-05 12:28:34,976][05794] Fps is (10 sec: 36828.8, 60 sec: 37267.7, 300 sec: 37321.0). Total num frames: 165388288. Throughput: 0: 9333.6. Samples: 3744340. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:28:34,990][05794] Avg episode reward: [(0, '48.682')] -[2024-07-05 12:28:35,462][06021] Updated weights for policy 0, policy_version 22634 (0.0009) -[2024-07-05 12:28:37,656][06021] Updated weights for policy 0, policy_version 22644 (0.0010) -[2024-07-05 12:28:39,908][06021] Updated weights for policy 0, policy_version 22654 (0.0009) -[2024-07-05 12:28:39,967][05794] Fps is (10 sec: 37683.5, 60 sec: 37410.5, 300 sec: 37322.2). Total num frames: 165576704. Throughput: 0: 9336.2. Samples: 3772352. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:28:39,968][05794] Avg episode reward: [(0, '51.468')] -[2024-07-05 12:28:42,124][06021] Updated weights for policy 0, policy_version 22664 (0.0010) -[2024-07-05 12:28:44,285][06021] Updated weights for policy 0, policy_version 22674 (0.0010) -[2024-07-05 12:28:44,967][05794] Fps is (10 sec: 37719.1, 60 sec: 37410.2, 300 sec: 37322.2). Total num frames: 165765120. Throughput: 0: 9332.7. Samples: 3828240. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:28:44,968][05794] Avg episode reward: [(0, '50.569')] -[2024-07-05 12:28:46,479][06021] Updated weights for policy 0, policy_version 22684 (0.0010) -[2024-07-05 12:28:48,616][06021] Updated weights for policy 0, policy_version 22694 (0.0008) -[2024-07-05 12:28:49,967][05794] Fps is (10 sec: 37683.1, 60 sec: 37410.2, 300 sec: 37350.0). Total num frames: 165953536. Throughput: 0: 9323.8. Samples: 3883964. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:28:49,968][05794] Avg episode reward: [(0, '50.441')] -[2024-07-05 12:28:50,808][06021] Updated weights for policy 0, policy_version 22704 (0.0009) -[2024-07-05 12:28:53,040][06021] Updated weights for policy 0, policy_version 22714 (0.0016) -[2024-07-05 12:28:54,973][05794] Fps is (10 sec: 36839.4, 60 sec: 37269.4, 300 sec: 37293.6). Total num frames: 166133760. Throughput: 0: 9318.2. Samples: 3912120. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:28:54,975][05794] Avg episode reward: [(0, '47.439')] -[2024-07-05 12:28:55,263][06021] Updated weights for policy 0, policy_version 22724 (0.0011) -[2024-07-05 12:28:57,531][06021] Updated weights for policy 0, policy_version 22734 (0.0010) -[2024-07-05 12:28:59,705][06021] Updated weights for policy 0, policy_version 22744 (0.0009) -[2024-07-05 12:28:59,967][05794] Fps is (10 sec: 36864.1, 60 sec: 37273.7, 300 sec: 37322.2). Total num frames: 166322176. Throughput: 0: 9322.8. Samples: 3968000. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:28:59,968][05794] Avg episode reward: [(0, '46.838')] -[2024-07-05 12:29:01,929][06021] Updated weights for policy 0, policy_version 22754 (0.0010) -[2024-07-05 12:29:04,058][06021] Updated weights for policy 0, policy_version 22764 (0.0010) -[2024-07-05 12:29:04,967][05794] Fps is (10 sec: 37708.4, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 166510592. Throughput: 0: 9307.0. Samples: 4023344. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:29:04,968][05794] Avg episode reward: [(0, '49.875')] -[2024-07-05 12:29:06,225][06021] Updated weights for policy 0, policy_version 22774 (0.0009) -[2024-07-05 12:29:08,433][06021] Updated weights for policy 0, policy_version 22784 (0.0010) -[2024-07-05 12:29:09,975][05794] Fps is (10 sec: 36840.6, 60 sec: 37269.6, 300 sec: 37293.6). Total num frames: 166690816. Throughput: 0: 9320.6. Samples: 4051844. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:29:09,977][05794] Avg episode reward: [(0, '50.029')] -[2024-07-05 12:29:10,623][06021] Updated weights for policy 0, policy_version 22794 (0.0009) -[2024-07-05 12:29:12,811][06021] Updated weights for policy 0, policy_version 22804 (0.0009) -[2024-07-05 12:29:14,967][05794] Fps is (10 sec: 36864.1, 60 sec: 37273.6, 300 sec: 37294.4). Total num frames: 166879232. Throughput: 0: 9320.3. Samples: 4107752. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:29:14,968][05794] Avg episode reward: [(0, '51.344')] -[2024-07-05 12:29:15,041][06021] Updated weights for policy 0, policy_version 22814 (0.0008) -[2024-07-05 12:29:17,245][06021] Updated weights for policy 0, policy_version 22824 (0.0009) -[2024-07-05 12:29:19,452][06021] Updated weights for policy 0, policy_version 22834 (0.0011) -[2024-07-05 12:29:19,973][05794] Fps is (10 sec: 37687.9, 60 sec: 37270.4, 300 sec: 37321.6). Total num frames: 167067648. Throughput: 0: 9331.3. Samples: 4164208. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:29:19,986][05794] Avg episode reward: [(0, '49.848')] -[2024-07-05 12:29:21,643][06021] Updated weights for policy 0, policy_version 22844 (0.0010) -[2024-07-05 12:29:23,791][06021] Updated weights for policy 0, policy_version 22854 (0.0015) -[2024-07-05 12:29:24,967][05794] Fps is (10 sec: 37683.2, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 167256064. Throughput: 0: 9330.0. Samples: 4192204. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:29:24,968][05794] Avg episode reward: [(0, '50.023')] -[2024-07-05 12:29:25,951][06021] Updated weights for policy 0, policy_version 22864 (0.0013) -[2024-07-05 12:29:28,161][06021] Updated weights for policy 0, policy_version 22874 (0.0008) -[2024-07-05 12:29:29,967][05794] Fps is (10 sec: 37702.6, 60 sec: 37410.2, 300 sec: 37322.4). Total num frames: 167444480. Throughput: 0: 9333.0. Samples: 4248224. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:29:29,968][05794] Avg episode reward: [(0, '49.345')] -[2024-07-05 12:29:30,361][06021] Updated weights for policy 0, policy_version 22884 (0.0009) -[2024-07-05 12:29:32,556][06021] Updated weights for policy 0, policy_version 22894 (0.0009) -[2024-07-05 12:29:34,728][06021] Updated weights for policy 0, policy_version 22904 (0.0015) -[2024-07-05 12:29:34,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37416.1, 300 sec: 37322.2). Total num frames: 167632896. Throughput: 0: 9339.0. Samples: 4304220. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:29:34,968][05794] Avg episode reward: [(0, '47.379')] -[2024-07-05 12:29:34,972][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000022905_167632896.pth... -[2024-07-05 12:29:35,054][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000021810_158662656.pth -[2024-07-05 12:29:36,917][06021] Updated weights for policy 0, policy_version 22914 (0.0010) -[2024-07-05 12:29:39,096][06021] Updated weights for policy 0, policy_version 22924 (0.0009) -[2024-07-05 12:29:39,967][05794] Fps is (10 sec: 36863.6, 60 sec: 37273.5, 300 sec: 37322.2). Total num frames: 167813120. Throughput: 0: 9326.3. Samples: 4331740. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:29:39,979][05794] Avg episode reward: [(0, '47.907')] -[2024-07-05 12:29:41,317][06021] Updated weights for policy 0, policy_version 22934 (0.0010) -[2024-07-05 12:29:43,512][06021] Updated weights for policy 0, policy_version 22944 (0.0009) -[2024-07-05 12:29:44,967][05794] Fps is (10 sec: 36864.0, 60 sec: 37273.6, 300 sec: 37350.0). Total num frames: 168001536. Throughput: 0: 9341.9. Samples: 4388384. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:29:44,968][05794] Avg episode reward: [(0, '49.723')] -[2024-07-05 12:29:45,697][06021] Updated weights for policy 0, policy_version 22954 (0.0012) -[2024-07-05 12:29:47,860][06021] Updated weights for policy 0, policy_version 22964 (0.0011) -[2024-07-05 12:29:49,967][05794] Fps is (10 sec: 37683.4, 60 sec: 37273.6, 300 sec: 37377.8). Total num frames: 168189952. Throughput: 0: 9350.7. Samples: 4444124. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:29:49,968][05794] Avg episode reward: [(0, '49.785')] -[2024-07-05 12:29:50,116][06021] Updated weights for policy 0, policy_version 22974 (0.0012) -[2024-07-05 12:29:52,208][06021] Updated weights for policy 0, policy_version 22984 (0.0009) -[2024-07-05 12:29:54,457][06021] Updated weights for policy 0, policy_version 22994 (0.0012) -[2024-07-05 12:29:54,967][05794] Fps is (10 sec: 37683.2, 60 sec: 37414.3, 300 sec: 37377.8). Total num frames: 168378368. Throughput: 0: 9354.2. Samples: 4472724. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:29:54,968][05794] Avg episode reward: [(0, '48.382')] -[2024-07-05 12:29:56,617][06021] Updated weights for policy 0, policy_version 23004 (0.0009) -[2024-07-05 12:29:58,827][06021] Updated weights for policy 0, policy_version 23014 (0.0009) -[2024-07-05 12:29:59,967][05794] Fps is (10 sec: 37682.7, 60 sec: 37410.0, 300 sec: 37377.7). Total num frames: 168566784. Throughput: 0: 9357.5. Samples: 4528840. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:29:59,968][05794] Avg episode reward: [(0, '47.221')] -[2024-07-05 12:30:00,994][06021] Updated weights for policy 0, policy_version 23024 (0.0009) -[2024-07-05 12:30:03,166][06021] Updated weights for policy 0, policy_version 23034 (0.0013) -[2024-07-05 12:30:04,967][05794] Fps is (10 sec: 37683.4, 60 sec: 37410.1, 300 sec: 37377.7). Total num frames: 168755200. Throughput: 0: 9348.4. Samples: 4584840. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:30:04,968][05794] Avg episode reward: [(0, '50.092')] -[2024-07-05 12:30:05,383][06021] Updated weights for policy 0, policy_version 23044 (0.0013) -[2024-07-05 12:30:07,560][06021] Updated weights for policy 0, policy_version 23054 (0.0009) -[2024-07-05 12:30:09,740][06021] Updated weights for policy 0, policy_version 23064 (0.0011) -[2024-07-05 12:30:09,969][05794] Fps is (10 sec: 36854.3, 60 sec: 37412.4, 300 sec: 37349.6). Total num frames: 168935424. Throughput: 0: 9334.5. Samples: 4612284. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:30:09,971][05794] Avg episode reward: [(0, '49.000')] -[2024-07-05 12:30:11,963][06021] Updated weights for policy 0, policy_version 23074 (0.0010) -[2024-07-05 12:30:14,162][06021] Updated weights for policy 0, policy_version 23084 (0.0010) -[2024-07-05 12:30:14,967][05794] Fps is (10 sec: 36863.8, 60 sec: 37410.1, 300 sec: 37350.0). Total num frames: 169123840. Throughput: 0: 9342.2. Samples: 4668624. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:30:14,968][05794] Avg episode reward: [(0, '49.914')] -[2024-07-05 12:30:16,370][06021] Updated weights for policy 0, policy_version 23094 (0.0009) -[2024-07-05 12:30:18,560][06021] Updated weights for policy 0, policy_version 23104 (0.0009) -[2024-07-05 12:30:19,967][05794] Fps is (10 sec: 37693.2, 60 sec: 37413.2, 300 sec: 37350.0). Total num frames: 169312256. Throughput: 0: 9339.1. Samples: 4724480. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:30:19,968][05794] Avg episode reward: [(0, '49.872')] -[2024-07-05 12:30:20,770][06021] Updated weights for policy 0, policy_version 23114 (0.0009) -[2024-07-05 12:30:22,951][06021] Updated weights for policy 0, policy_version 23124 (0.0009) -[2024-07-05 12:30:24,967][05794] Fps is (10 sec: 36864.0, 60 sec: 37273.5, 300 sec: 37349.9). Total num frames: 169492480. Throughput: 0: 9342.9. Samples: 4752172. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:30:24,968][05794] Avg episode reward: [(0, '52.202')] -[2024-07-05 12:30:25,189][06021] Updated weights for policy 0, policy_version 23134 (0.0011) -[2024-07-05 12:30:27,408][06021] Updated weights for policy 0, policy_version 23144 (0.0010) -[2024-07-05 12:30:29,575][06021] Updated weights for policy 0, policy_version 23154 (0.0011) -[2024-07-05 12:30:29,976][05794] Fps is (10 sec: 36830.1, 60 sec: 37267.8, 300 sec: 37348.8). Total num frames: 169680896. Throughput: 0: 9324.7. Samples: 4808084. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:30:29,978][05794] Avg episode reward: [(0, '48.895')] -[2024-07-05 12:30:31,754][06021] Updated weights for policy 0, policy_version 23164 (0.0009) -[2024-07-05 12:30:33,957][06021] Updated weights for policy 0, policy_version 23174 (0.0010) -[2024-07-05 12:30:34,967][05794] Fps is (10 sec: 37683.2, 60 sec: 37273.6, 300 sec: 37350.0). Total num frames: 169869312. Throughput: 0: 9344.7. Samples: 4864636. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:30:34,968][05794] Avg episode reward: [(0, '49.034')] -[2024-07-05 12:30:36,117][06021] Updated weights for policy 0, policy_version 23184 (0.0009) -[2024-07-05 12:30:38,289][06021] Updated weights for policy 0, policy_version 23194 (0.0010) -[2024-07-05 12:30:39,967][05794] Fps is (10 sec: 37718.4, 60 sec: 37410.2, 300 sec: 37350.0). Total num frames: 170057728. Throughput: 0: 9338.9. Samples: 4892976. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:30:39,968][05794] Avg episode reward: [(0, '49.023')] -[2024-07-05 12:30:40,511][06021] Updated weights for policy 0, policy_version 23204 (0.0010) -[2024-07-05 12:30:42,692][06021] Updated weights for policy 0, policy_version 23214 (0.0012) -[2024-07-05 12:30:44,844][06021] Updated weights for policy 0, policy_version 23224 (0.0013) -[2024-07-05 12:30:44,967][05794] Fps is (10 sec: 37683.4, 60 sec: 37410.1, 300 sec: 37350.0). Total num frames: 170246144. Throughput: 0: 9327.0. Samples: 4948552. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:30:44,968][05794] Avg episode reward: [(0, '50.440')] -[2024-07-05 12:30:47,045][06021] Updated weights for policy 0, policy_version 23234 (0.0013) -[2024-07-05 12:30:49,206][06021] Updated weights for policy 0, policy_version 23244 (0.0009) -[2024-07-05 12:30:49,967][05794] Fps is (10 sec: 37683.1, 60 sec: 37410.1, 300 sec: 37350.0). Total num frames: 170434560. Throughput: 0: 9338.8. Samples: 5005084. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:30:49,968][05794] Avg episode reward: [(0, '50.335')] -[2024-07-05 12:30:51,381][06021] Updated weights for policy 0, policy_version 23254 (0.0012) -[2024-07-05 12:30:53,611][06021] Updated weights for policy 0, policy_version 23264 (0.0008) -[2024-07-05 12:30:54,967][05794] Fps is (10 sec: 37683.7, 60 sec: 37410.2, 300 sec: 37378.5). Total num frames: 170622976. Throughput: 0: 9352.3. Samples: 5033112. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:30:54,968][05794] Avg episode reward: [(0, '49.626')] -[2024-07-05 12:30:55,788][06021] Updated weights for policy 0, policy_version 23274 (0.0012) -[2024-07-05 12:30:58,054][06021] Updated weights for policy 0, policy_version 23284 (0.0012) -[2024-07-05 12:30:59,967][05794] Fps is (10 sec: 36863.0, 60 sec: 37273.5, 300 sec: 37352.9). Total num frames: 170803200. Throughput: 0: 9341.2. Samples: 5088980. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:30:59,968][05794] Avg episode reward: [(0, '51.263')] -[2024-07-05 12:31:00,279][06021] Updated weights for policy 0, policy_version 23294 (0.0010) -[2024-07-05 12:31:02,439][06021] Updated weights for policy 0, policy_version 23304 (0.0009) -[2024-07-05 12:31:04,563][06021] Updated weights for policy 0, policy_version 23314 (0.0015) -[2024-07-05 12:31:04,973][05794] Fps is (10 sec: 36838.8, 60 sec: 37269.4, 300 sec: 37349.1). Total num frames: 170991616. Throughput: 0: 9344.4. Samples: 5145040. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:31:04,986][05794] Avg episode reward: [(0, '49.059')] -[2024-07-05 12:31:06,798][06021] Updated weights for policy 0, policy_version 23324 (0.0014) -[2024-07-05 12:31:08,971][06021] Updated weights for policy 0, policy_version 23334 (0.0009) -[2024-07-05 12:31:09,967][05794] Fps is (10 sec: 37684.2, 60 sec: 37411.8, 300 sec: 37350.0). Total num frames: 171180032. Throughput: 0: 9350.9. Samples: 5172964. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:31:09,968][05794] Avg episode reward: [(0, '50.625')] -[2024-07-05 12:31:11,253][06021] Updated weights for policy 0, policy_version 23344 (0.0011) -[2024-07-05 12:31:13,439][06021] Updated weights for policy 0, policy_version 23354 (0.0010) -[2024-07-05 12:31:14,967][05794] Fps is (10 sec: 37708.8, 60 sec: 37410.2, 300 sec: 37350.0). Total num frames: 171368448. Throughput: 0: 9349.5. Samples: 5228724. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:31:14,968][05794] Avg episode reward: [(0, '51.782')] -[2024-07-05 12:31:15,640][06021] Updated weights for policy 0, policy_version 23364 (0.0011) -[2024-07-05 12:31:17,815][06021] Updated weights for policy 0, policy_version 23374 (0.0009) -[2024-07-05 12:31:19,975][05794] Fps is (10 sec: 36840.1, 60 sec: 37269.6, 300 sec: 37350.1). Total num frames: 171548672. Throughput: 0: 9319.7. Samples: 5284084. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:31:19,979][05794] Avg episode reward: [(0, '49.505')] -[2024-07-05 12:31:20,033][06021] Updated weights for policy 0, policy_version 23384 (0.0010) -[2024-07-05 12:31:22,201][06021] Updated weights for policy 0, policy_version 23394 (0.0010) -[2024-07-05 12:31:24,408][06021] Updated weights for policy 0, policy_version 23404 (0.0013) -[2024-07-05 12:31:24,967][05794] Fps is (10 sec: 36863.6, 60 sec: 37410.1, 300 sec: 37322.2). Total num frames: 171737088. Throughput: 0: 9322.3. Samples: 5312480. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:31:24,968][05794] Avg episode reward: [(0, '49.285')] -[2024-07-05 12:31:26,648][06021] Updated weights for policy 0, policy_version 23414 (0.0010) -[2024-07-05 12:31:28,834][06021] Updated weights for policy 0, policy_version 23424 (0.0013) -[2024-07-05 12:31:29,967][05794] Fps is (10 sec: 37707.9, 60 sec: 37416.0, 300 sec: 37350.8). Total num frames: 171925504. Throughput: 0: 9318.3. Samples: 5367876. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:31:29,968][05794] Avg episode reward: [(0, '47.554')] -[2024-07-05 12:31:31,027][06021] Updated weights for policy 0, policy_version 23434 (0.0009) -[2024-07-05 12:31:33,235][06021] Updated weights for policy 0, policy_version 23444 (0.0009) -[2024-07-05 12:31:34,967][05794] Fps is (10 sec: 36863.7, 60 sec: 37273.5, 300 sec: 37322.2). Total num frames: 172105728. Throughput: 0: 9307.5. Samples: 5423924. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:31:34,968][05794] Avg episode reward: [(0, '48.862')] -[2024-07-05 12:31:34,974][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000023452_172113920.pth... -[2024-07-05 12:31:35,058][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000022358_163151872.pth -[2024-07-05 12:31:35,415][06021] Updated weights for policy 0, policy_version 23454 (0.0010) -[2024-07-05 12:31:37,587][06021] Updated weights for policy 0, policy_version 23464 (0.0015) -[2024-07-05 12:31:39,841][06021] Updated weights for policy 0, policy_version 23474 (0.0009) -[2024-07-05 12:31:39,974][05794] Fps is (10 sec: 36841.1, 60 sec: 37269.8, 300 sec: 37321.4). Total num frames: 172294144. Throughput: 0: 9310.3. Samples: 5452136. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:31:39,991][05794] Avg episode reward: [(0, '49.446')] -[2024-07-05 12:31:42,002][06021] Updated weights for policy 0, policy_version 23484 (0.0012) -[2024-07-05 12:31:44,205][06021] Updated weights for policy 0, policy_version 23494 (0.0009) -[2024-07-05 12:31:44,967][05794] Fps is (10 sec: 37682.8, 60 sec: 37273.4, 300 sec: 37322.2). Total num frames: 172482560. Throughput: 0: 9309.6. Samples: 5507912. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:31:44,968][05794] Avg episode reward: [(0, '47.693')] -[2024-07-05 12:31:46,391][06021] Updated weights for policy 0, policy_version 23504 (0.0012) -[2024-07-05 12:31:48,567][06021] Updated weights for policy 0, policy_version 23514 (0.0009) -[2024-07-05 12:31:49,967][05794] Fps is (10 sec: 37706.5, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 172670976. Throughput: 0: 9320.4. Samples: 5564396. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:31:49,968][05794] Avg episode reward: [(0, '51.959')] -[2024-07-05 12:31:50,770][06021] Updated weights for policy 0, policy_version 23524 (0.0009) -[2024-07-05 12:31:52,953][06021] Updated weights for policy 0, policy_version 23534 (0.0009) -[2024-07-05 12:31:54,967][05794] Fps is (10 sec: 37684.3, 60 sec: 37273.5, 300 sec: 37322.2). Total num frames: 172859392. Throughput: 0: 9308.4. Samples: 5591840. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:31:54,967][05794] Avg episode reward: [(0, '53.220')] -[2024-07-05 12:31:55,191][06021] Updated weights for policy 0, policy_version 23544 (0.0010) -[2024-07-05 12:31:57,365][06021] Updated weights for policy 0, policy_version 23554 (0.0009) -[2024-07-05 12:31:59,548][06021] Updated weights for policy 0, policy_version 23564 (0.0009) -[2024-07-05 12:31:59,967][05794] Fps is (10 sec: 36862.6, 60 sec: 37273.5, 300 sec: 37322.2). Total num frames: 173039616. Throughput: 0: 9324.1. Samples: 5648312. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:31:59,968][05794] Avg episode reward: [(0, '52.150')] -[2024-07-05 12:32:01,711][06021] Updated weights for policy 0, policy_version 23574 (0.0010) -[2024-07-05 12:32:03,944][06021] Updated weights for policy 0, policy_version 23584 (0.0010) -[2024-07-05 12:32:04,967][05794] Fps is (10 sec: 36863.7, 60 sec: 37277.8, 300 sec: 37322.2). Total num frames: 173228032. Throughput: 0: 9328.2. Samples: 5703792. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:32:04,969][05794] Avg episode reward: [(0, '53.104')] -[2024-07-05 12:32:06,097][06021] Updated weights for policy 0, policy_version 23594 (0.0009) -[2024-07-05 12:32:08,353][06021] Updated weights for policy 0, policy_version 23604 (0.0010) -[2024-07-05 12:32:09,967][05794] Fps is (10 sec: 37684.7, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 173416448. Throughput: 0: 9327.9. Samples: 5732236. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:32:09,968][05794] Avg episode reward: [(0, '50.189')] -[2024-07-05 12:32:10,538][06021] Updated weights for policy 0, policy_version 23614 (0.0014) -[2024-07-05 12:32:12,720][06021] Updated weights for policy 0, policy_version 23624 (0.0013) -[2024-07-05 12:32:14,951][06021] Updated weights for policy 0, policy_version 23634 (0.0011) -[2024-07-05 12:32:14,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37273.5, 300 sec: 37322.2). Total num frames: 173604864. Throughput: 0: 9334.3. Samples: 5787920. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:32:14,968][05794] Avg episode reward: [(0, '51.803')] -[2024-07-05 12:32:17,156][06021] Updated weights for policy 0, policy_version 23644 (0.0012) -[2024-07-05 12:32:19,291][06021] Updated weights for policy 0, policy_version 23654 (0.0009) -[2024-07-05 12:32:19,967][05794] Fps is (10 sec: 37683.2, 60 sec: 37414.2, 300 sec: 37350.0). Total num frames: 173793280. Throughput: 0: 9337.5. Samples: 5844108. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:32:19,968][05794] Avg episode reward: [(0, '51.942')] -[2024-07-05 12:32:21,480][06021] Updated weights for policy 0, policy_version 23664 (0.0014) -[2024-07-05 12:32:23,679][06021] Updated weights for policy 0, policy_version 23674 (0.0008) -[2024-07-05 12:32:24,967][05794] Fps is (10 sec: 36864.1, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 173973504. Throughput: 0: 9330.3. Samples: 5871944. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:32:24,968][05794] Avg episode reward: [(0, '50.867')] -[2024-07-05 12:32:25,919][06021] Updated weights for policy 0, policy_version 23684 (0.0009) -[2024-07-05 12:32:28,130][06021] Updated weights for policy 0, policy_version 23694 (0.0009) -[2024-07-05 12:32:29,967][05794] Fps is (10 sec: 36863.9, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 174161920. Throughput: 0: 9331.3. Samples: 5927820. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:32:29,968][05794] Avg episode reward: [(0, '47.883')] -[2024-07-05 12:32:30,318][06021] Updated weights for policy 0, policy_version 23704 (0.0009) -[2024-07-05 12:32:32,476][06021] Updated weights for policy 0, policy_version 23714 (0.0009) -[2024-07-05 12:32:34,656][06021] Updated weights for policy 0, policy_version 23724 (0.0011) -[2024-07-05 12:32:34,967][05794] Fps is (10 sec: 37683.6, 60 sec: 37410.3, 300 sec: 37350.0). Total num frames: 174350336. Throughput: 0: 9320.7. Samples: 5983828. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:32:34,968][05794] Avg episode reward: [(0, '51.555')] -[2024-07-05 12:32:36,855][06021] Updated weights for policy 0, policy_version 23734 (0.0009) -[2024-07-05 12:32:39,028][06021] Updated weights for policy 0, policy_version 23744 (0.0010) -[2024-07-05 12:32:39,967][05794] Fps is (10 sec: 37683.1, 60 sec: 37414.0, 300 sec: 37350.0). Total num frames: 174538752. Throughput: 0: 9328.2. Samples: 6011608. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:32:39,968][05794] Avg episode reward: [(0, '49.723')] -[2024-07-05 12:32:41,247][06021] Updated weights for policy 0, policy_version 23754 (0.0015) -[2024-07-05 12:32:43,469][06021] Updated weights for policy 0, policy_version 23764 (0.0013) -[2024-07-05 12:32:44,972][05794] Fps is (10 sec: 36845.9, 60 sec: 37270.7, 300 sec: 37321.6). Total num frames: 174718976. Throughput: 0: 9324.5. Samples: 6067956. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:32:44,974][05794] Avg episode reward: [(0, '50.732')] -[2024-07-05 12:32:45,676][06021] Updated weights for policy 0, policy_version 23774 (0.0010) -[2024-07-05 12:32:47,871][06021] Updated weights for policy 0, policy_version 23784 (0.0010) -[2024-07-05 12:32:49,967][05794] Fps is (10 sec: 36864.3, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 174907392. Throughput: 0: 9334.0. Samples: 6123820. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:32:49,968][05794] Avg episode reward: [(0, '49.820')] -[2024-07-05 12:32:50,053][06021] Updated weights for policy 0, policy_version 23794 (0.0012) -[2024-07-05 12:32:52,207][06021] Updated weights for policy 0, policy_version 23804 (0.0010) -[2024-07-05 12:32:54,388][06021] Updated weights for policy 0, policy_version 23814 (0.0013) -[2024-07-05 12:32:54,967][05794] Fps is (10 sec: 37701.6, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 175095808. Throughput: 0: 9329.7. Samples: 6152072. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:32:54,968][05794] Avg episode reward: [(0, '54.468')] -[2024-07-05 12:32:54,972][06001] Saving new best policy, reward=54.468! -[2024-07-05 12:32:56,674][06021] Updated weights for policy 0, policy_version 23824 (0.0009) -[2024-07-05 12:32:58,868][06021] Updated weights for policy 0, policy_version 23834 (0.0010) -[2024-07-05 12:32:59,978][05794] Fps is (10 sec: 36831.3, 60 sec: 37268.4, 300 sec: 37293.3). Total num frames: 175276032. Throughput: 0: 9321.6. Samples: 6207472. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:32:59,982][05794] Avg episode reward: [(0, '48.488')] -[2024-07-05 12:33:01,043][06021] Updated weights for policy 0, policy_version 23844 (0.0010) -[2024-07-05 12:33:03,229][06021] Updated weights for policy 0, policy_version 23854 (0.0010) -[2024-07-05 12:33:04,967][05794] Fps is (10 sec: 36864.2, 60 sec: 37273.7, 300 sec: 37322.2). Total num frames: 175464448. Throughput: 0: 9310.1. Samples: 6263064. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:33:04,967][05794] Avg episode reward: [(0, '51.949')] -[2024-07-05 12:33:05,480][06021] Updated weights for policy 0, policy_version 23864 (0.0010) -[2024-07-05 12:33:07,671][06021] Updated weights for policy 0, policy_version 23874 (0.0010) -[2024-07-05 12:33:09,912][06021] Updated weights for policy 0, policy_version 23884 (0.0012) -[2024-07-05 12:33:09,967][05794] Fps is (10 sec: 37716.5, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 175652864. Throughput: 0: 9323.0. Samples: 6291480. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:33:09,968][05794] Avg episode reward: [(0, '48.479')] -[2024-07-05 12:33:12,127][06021] Updated weights for policy 0, policy_version 23894 (0.0013) -[2024-07-05 12:33:14,267][06021] Updated weights for policy 0, policy_version 23904 (0.0013) -[2024-07-05 12:33:14,967][05794] Fps is (10 sec: 37683.1, 60 sec: 37273.7, 300 sec: 37322.2). Total num frames: 175841280. Throughput: 0: 9322.4. Samples: 6347328. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:33:14,968][05794] Avg episode reward: [(0, '48.865')] -[2024-07-05 12:33:16,453][06021] Updated weights for policy 0, policy_version 23914 (0.0010) -[2024-07-05 12:33:18,626][06021] Updated weights for policy 0, policy_version 23924 (0.0010) -[2024-07-05 12:33:19,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 176029696. Throughput: 0: 9321.3. Samples: 6403288. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:33:19,968][05794] Avg episode reward: [(0, '52.009')] -[2024-07-05 12:33:20,855][06021] Updated weights for policy 0, policy_version 23934 (0.0010) -[2024-07-05 12:33:23,087][06021] Updated weights for policy 0, policy_version 23944 (0.0010) -[2024-07-05 12:33:24,967][05794] Fps is (10 sec: 36863.4, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 176209920. Throughput: 0: 9317.3. Samples: 6430888. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:33:24,968][05794] Avg episode reward: [(0, '51.052')] -[2024-07-05 12:33:25,312][06021] Updated weights for policy 0, policy_version 23954 (0.0009) -[2024-07-05 12:33:27,515][06021] Updated weights for policy 0, policy_version 23964 (0.0012) -[2024-07-05 12:33:29,789][06021] Updated weights for policy 0, policy_version 23974 (0.0010) -[2024-07-05 12:33:29,967][05794] Fps is (10 sec: 36864.1, 60 sec: 37273.6, 300 sec: 37323.4). Total num frames: 176398336. Throughput: 0: 9303.9. Samples: 6486588. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:33:29,968][05794] Avg episode reward: [(0, '49.972')] -[2024-07-05 12:33:32,004][06021] Updated weights for policy 0, policy_version 23984 (0.0009) -[2024-07-05 12:33:34,176][06021] Updated weights for policy 0, policy_version 23994 (0.0009) -[2024-07-05 12:33:34,967][05794] Fps is (10 sec: 36864.5, 60 sec: 37137.0, 300 sec: 37294.4). Total num frames: 176578560. Throughput: 0: 9280.1. Samples: 6541424. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:33:34,968][05794] Avg episode reward: [(0, '49.731')] -[2024-07-05 12:33:35,039][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000023998_176586752.pth... -[2024-07-05 12:33:35,122][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000022905_167632896.pth -[2024-07-05 12:33:36,354][06021] Updated weights for policy 0, policy_version 24004 (0.0009) -[2024-07-05 12:33:38,568][06021] Updated weights for policy 0, policy_version 24014 (0.0010) -[2024-07-05 12:33:39,967][05794] Fps is (10 sec: 36864.0, 60 sec: 37137.1, 300 sec: 37294.4). Total num frames: 176766976. Throughput: 0: 9276.3. Samples: 6569504. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:33:39,968][05794] Avg episode reward: [(0, '49.976')] -[2024-07-05 12:33:40,801][06021] Updated weights for policy 0, policy_version 24024 (0.0012) -[2024-07-05 12:33:43,052][06021] Updated weights for policy 0, policy_version 24034 (0.0009) -[2024-07-05 12:33:44,974][05794] Fps is (10 sec: 36837.5, 60 sec: 37135.6, 300 sec: 37265.7). Total num frames: 176947200. Throughput: 0: 9273.8. Samples: 6624780. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:33:44,977][05794] Avg episode reward: [(0, '51.522')] -[2024-07-05 12:33:45,253][06021] Updated weights for policy 0, policy_version 24044 (0.0011) -[2024-07-05 12:33:47,431][06021] Updated weights for policy 0, policy_version 24054 (0.0012) -[2024-07-05 12:33:49,639][06021] Updated weights for policy 0, policy_version 24064 (0.0012) -[2024-07-05 12:33:49,967][05794] Fps is (10 sec: 36864.3, 60 sec: 37137.1, 300 sec: 37295.3). Total num frames: 177135616. Throughput: 0: 9284.0. Samples: 6680844. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:33:49,968][05794] Avg episode reward: [(0, '50.955')] -[2024-07-05 12:33:51,842][06021] Updated weights for policy 0, policy_version 24074 (0.0009) -[2024-07-05 12:33:54,014][06021] Updated weights for policy 0, policy_version 24084 (0.0012) -[2024-07-05 12:33:54,967][05794] Fps is (10 sec: 37710.3, 60 sec: 37137.0, 300 sec: 37294.4). Total num frames: 177324032. Throughput: 0: 9267.7. Samples: 6708528. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:33:54,968][05794] Avg episode reward: [(0, '50.209')] -[2024-07-05 12:33:56,235][06021] Updated weights for policy 0, policy_version 24094 (0.0009) -[2024-07-05 12:33:58,405][06021] Updated weights for policy 0, policy_version 24104 (0.0016) -[2024-07-05 12:33:59,973][05794] Fps is (10 sec: 36840.9, 60 sec: 37138.7, 300 sec: 37265.9). Total num frames: 177504256. Throughput: 0: 9276.0. Samples: 6764804. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:33:59,974][05794] Avg episode reward: [(0, '51.056')] -[2024-07-05 12:34:00,633][06021] Updated weights for policy 0, policy_version 24114 (0.0010) -[2024-07-05 12:34:02,810][06021] Updated weights for policy 0, policy_version 24124 (0.0011) -[2024-07-05 12:34:04,967][05794] Fps is (10 sec: 36863.5, 60 sec: 37136.9, 300 sec: 37295.2). Total num frames: 177692672. Throughput: 0: 9261.1. Samples: 6820040. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:34:04,968][05794] Avg episode reward: [(0, '50.283')] -[2024-07-05 12:34:05,077][06021] Updated weights for policy 0, policy_version 24134 (0.0010) -[2024-07-05 12:34:07,249][06021] Updated weights for policy 0, policy_version 24144 (0.0009) -[2024-07-05 12:34:09,464][06021] Updated weights for policy 0, policy_version 24154 (0.0009) -[2024-07-05 12:34:09,967][05794] Fps is (10 sec: 37705.7, 60 sec: 37136.9, 300 sec: 37294.4). Total num frames: 177881088. Throughput: 0: 9265.5. Samples: 6847836. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:34:09,968][05794] Avg episode reward: [(0, '48.240')] -[2024-07-05 12:34:11,741][06021] Updated weights for policy 0, policy_version 24164 (0.0009) -[2024-07-05 12:34:13,950][06021] Updated weights for policy 0, policy_version 24174 (0.0009) -[2024-07-05 12:34:14,967][05794] Fps is (10 sec: 36864.5, 60 sec: 37000.5, 300 sec: 37267.3). Total num frames: 178061312. Throughput: 0: 9260.8. Samples: 6903324. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:34:14,968][05794] Avg episode reward: [(0, '50.776')] -[2024-07-05 12:34:16,156][06021] Updated weights for policy 0, policy_version 24184 (0.0008) -[2024-07-05 12:34:18,339][06021] Updated weights for policy 0, policy_version 24194 (0.0009) -[2024-07-05 12:34:19,967][05794] Fps is (10 sec: 36864.9, 60 sec: 37000.6, 300 sec: 37266.7). Total num frames: 178249728. Throughput: 0: 9282.8. Samples: 6959152. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:34:19,968][05794] Avg episode reward: [(0, '50.642')] -[2024-07-05 12:34:20,530][06021] Updated weights for policy 0, policy_version 24204 (0.0009) -[2024-07-05 12:34:22,697][06021] Updated weights for policy 0, policy_version 24214 (0.0013) -[2024-07-05 12:34:24,900][06021] Updated weights for policy 0, policy_version 24224 (0.0009) -[2024-07-05 12:34:24,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37137.1, 300 sec: 37266.6). Total num frames: 178438144. Throughput: 0: 9282.4. Samples: 6987212. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:34:24,968][05794] Avg episode reward: [(0, '48.180')] -[2024-07-05 12:34:27,125][06021] Updated weights for policy 0, policy_version 24234 (0.0009) -[2024-07-05 12:34:29,347][06021] Updated weights for policy 0, policy_version 24244 (0.0010) -[2024-07-05 12:34:29,967][05794] Fps is (10 sec: 36864.1, 60 sec: 37000.6, 300 sec: 37238.9). Total num frames: 178618368. Throughput: 0: 9284.0. Samples: 7042492. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:34:29,968][05794] Avg episode reward: [(0, '50.111')] -[2024-07-05 12:34:31,546][06021] Updated weights for policy 0, policy_version 24254 (0.0010) -[2024-07-05 12:34:33,792][06021] Updated weights for policy 0, policy_version 24264 (0.0009) -[2024-07-05 12:34:34,967][05794] Fps is (10 sec: 36863.9, 60 sec: 37137.0, 300 sec: 37266.6). Total num frames: 178806784. Throughput: 0: 9277.4. Samples: 7098328. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:34:34,968][05794] Avg episode reward: [(0, '50.361')] -[2024-07-05 12:34:36,038][06021] Updated weights for policy 0, policy_version 24274 (0.0012) -[2024-07-05 12:34:38,254][06021] Updated weights for policy 0, policy_version 24284 (0.0011) -[2024-07-05 12:34:39,975][05794] Fps is (10 sec: 36836.6, 60 sec: 36996.0, 300 sec: 37238.0). Total num frames: 178987008. Throughput: 0: 9267.6. Samples: 7125636. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:34:39,977][05794] Avg episode reward: [(0, '49.022')] -[2024-07-05 12:34:40,434][06021] Updated weights for policy 0, policy_version 24294 (0.0010) -[2024-07-05 12:34:42,681][06021] Updated weights for policy 0, policy_version 24304 (0.0012) -[2024-07-05 12:34:44,846][06021] Updated weights for policy 0, policy_version 24314 (0.0010) -[2024-07-05 12:34:44,975][05794] Fps is (10 sec: 36832.4, 60 sec: 37136.1, 300 sec: 37237.8). Total num frames: 179175424. Throughput: 0: 9262.9. Samples: 7181660. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:34:44,988][05794] Avg episode reward: [(0, '51.836')] -[2024-07-05 12:34:47,044][06021] Updated weights for policy 0, policy_version 24324 (0.0010) -[2024-07-05 12:34:49,232][06021] Updated weights for policy 0, policy_version 24334 (0.0010) -[2024-07-05 12:34:49,967][05794] Fps is (10 sec: 37711.1, 60 sec: 37137.0, 300 sec: 37238.9). Total num frames: 179363840. Throughput: 0: 9279.9. Samples: 7237632. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:34:49,968][05794] Avg episode reward: [(0, '48.007')] -[2024-07-05 12:34:51,422][06021] Updated weights for policy 0, policy_version 24344 (0.0009) -[2024-07-05 12:34:53,612][06021] Updated weights for policy 0, policy_version 24354 (0.0011) -[2024-07-05 12:34:54,967][05794] Fps is (10 sec: 37716.1, 60 sec: 37137.1, 300 sec: 37238.9). Total num frames: 179552256. Throughput: 0: 9278.1. Samples: 7265348. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:34:54,968][05794] Avg episode reward: [(0, '47.451')] -[2024-07-05 12:34:55,810][06021] Updated weights for policy 0, policy_version 24364 (0.0012) -[2024-07-05 12:34:57,999][06021] Updated weights for policy 0, policy_version 24374 (0.0009) -[2024-07-05 12:34:59,967][05794] Fps is (10 sec: 36862.2, 60 sec: 37140.6, 300 sec: 37211.1). Total num frames: 179732480. Throughput: 0: 9288.5. Samples: 7321312. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:34:59,968][05794] Avg episode reward: [(0, '50.141')] -[2024-07-05 12:35:00,229][06021] Updated weights for policy 0, policy_version 24384 (0.0010) -[2024-07-05 12:35:02,434][06021] Updated weights for policy 0, policy_version 24394 (0.0012) -[2024-07-05 12:35:04,615][06021] Updated weights for policy 0, policy_version 24404 (0.0009) -[2024-07-05 12:35:04,967][05794] Fps is (10 sec: 36864.1, 60 sec: 37137.2, 300 sec: 37239.2). Total num frames: 179920896. Throughput: 0: 9291.7. Samples: 7377276. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:35:04,968][05794] Avg episode reward: [(0, '48.997')] -[2024-07-05 12:35:06,792][06021] Updated weights for policy 0, policy_version 24414 (0.0009) -[2024-07-05 12:35:09,009][06021] Updated weights for policy 0, policy_version 24424 (0.0010) -[2024-07-05 12:35:09,967][05794] Fps is (10 sec: 37685.1, 60 sec: 37137.2, 300 sec: 37238.9). Total num frames: 180109312. Throughput: 0: 9285.0. Samples: 7405036. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:35:09,968][05794] Avg episode reward: [(0, '49.937')] -[2024-07-05 12:35:11,194][06021] Updated weights for policy 0, policy_version 24434 (0.0013) -[2024-07-05 12:35:13,470][06021] Updated weights for policy 0, policy_version 24444 (0.0011) -[2024-07-05 12:35:14,967][05794] Fps is (10 sec: 36864.3, 60 sec: 37137.2, 300 sec: 37211.1). Total num frames: 180289536. Throughput: 0: 9289.3. Samples: 7460512. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:35:14,968][05794] Avg episode reward: [(0, '48.315')] -[2024-07-05 12:35:15,690][06021] Updated weights for policy 0, policy_version 24454 (0.0014) -[2024-07-05 12:35:17,924][06021] Updated weights for policy 0, policy_version 24464 (0.0009) -[2024-07-05 12:35:19,967][05794] Fps is (10 sec: 36864.1, 60 sec: 37137.1, 300 sec: 37238.9). Total num frames: 180477952. Throughput: 0: 9283.6. Samples: 7516088. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:35:19,968][05794] Avg episode reward: [(0, '49.286')] -[2024-07-05 12:35:20,138][06021] Updated weights for policy 0, policy_version 24474 (0.0012) -[2024-07-05 12:35:22,333][06021] Updated weights for policy 0, policy_version 24484 (0.0009) -[2024-07-05 12:35:24,535][06021] Updated weights for policy 0, policy_version 24494 (0.0009) -[2024-07-05 12:35:24,967][05794] Fps is (10 sec: 37683.0, 60 sec: 37137.2, 300 sec: 37240.1). Total num frames: 180666368. Throughput: 0: 9289.9. Samples: 7543612. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:35:24,968][05794] Avg episode reward: [(0, '52.115')] -[2024-07-05 12:35:26,717][06021] Updated weights for policy 0, policy_version 24504 (0.0009) -[2024-07-05 12:35:28,960][06021] Updated weights for policy 0, policy_version 24514 (0.0009) -[2024-07-05 12:35:29,967][05794] Fps is (10 sec: 36863.3, 60 sec: 37136.9, 300 sec: 37211.1). Total num frames: 180846592. Throughput: 0: 9286.3. Samples: 7599464. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:35:29,968][05794] Avg episode reward: [(0, '49.082')] -[2024-07-05 12:35:31,157][06021] Updated weights for policy 0, policy_version 24524 (0.0010) -[2024-07-05 12:35:33,345][06021] Updated weights for policy 0, policy_version 24534 (0.0009) -[2024-07-05 12:35:34,967][05794] Fps is (10 sec: 36863.8, 60 sec: 37137.2, 300 sec: 37211.1). Total num frames: 181035008. Throughput: 0: 9276.8. Samples: 7655088. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:35:34,968][05794] Avg episode reward: [(0, '47.886')] -[2024-07-05 12:35:34,973][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000024541_181035008.pth... -[2024-07-05 12:35:35,066][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000023452_172113920.pth -[2024-07-05 12:35:35,616][06021] Updated weights for policy 0, policy_version 24544 (0.0010) -[2024-07-05 12:35:37,787][06021] Updated weights for policy 0, policy_version 24554 (0.0009) -[2024-07-05 12:35:39,974][05794] Fps is (10 sec: 36839.4, 60 sec: 37137.4, 300 sec: 37182.5). Total num frames: 181215232. Throughput: 0: 9271.3. Samples: 7682620. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:35:39,976][05794] Avg episode reward: [(0, '49.982')] -[2024-07-05 12:35:40,023][06021] Updated weights for policy 0, policy_version 24564 (0.0009) -[2024-07-05 12:35:42,211][06021] Updated weights for policy 0, policy_version 24574 (0.0009) -[2024-07-05 12:35:44,382][06021] Updated weights for policy 0, policy_version 24584 (0.0011) -[2024-07-05 12:35:44,967][05794] Fps is (10 sec: 36863.9, 60 sec: 37142.4, 300 sec: 37183.3). Total num frames: 181403648. Throughput: 0: 9267.7. Samples: 7738356. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:35:44,968][05794] Avg episode reward: [(0, '47.100')] -[2024-07-05 12:35:46,580][06021] Updated weights for policy 0, policy_version 24594 (0.0011) -[2024-07-05 12:35:48,788][06021] Updated weights for policy 0, policy_version 24604 (0.0010) -[2024-07-05 12:35:49,967][05794] Fps is (10 sec: 37708.3, 60 sec: 37136.9, 300 sec: 37183.3). Total num frames: 181592064. Throughput: 0: 9262.9. Samples: 7794108. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:35:49,968][05794] Avg episode reward: [(0, '51.212')] -[2024-07-05 12:35:51,016][06021] Updated weights for policy 0, policy_version 24614 (0.0012) -[2024-07-05 12:35:53,253][06021] Updated weights for policy 0, policy_version 24624 (0.0009) -[2024-07-05 12:35:54,967][05794] Fps is (10 sec: 36862.6, 60 sec: 37000.3, 300 sec: 37183.3). Total num frames: 181772288. Throughput: 0: 9260.1. Samples: 7821744. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:35:54,968][05794] Avg episode reward: [(0, '51.111')] -[2024-07-05 12:35:55,417][06021] Updated weights for policy 0, policy_version 24634 (0.0009) -[2024-07-05 12:35:57,609][06021] Updated weights for policy 0, policy_version 24644 (0.0011) -[2024-07-05 12:35:59,775][06021] Updated weights for policy 0, policy_version 24654 (0.0012) -[2024-07-05 12:35:59,967][05794] Fps is (10 sec: 36864.4, 60 sec: 37137.3, 300 sec: 37184.2). Total num frames: 181960704. Throughput: 0: 9273.1. Samples: 7877804. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:35:59,968][05794] Avg episode reward: [(0, '50.177')] -[2024-07-05 12:36:02,031][06021] Updated weights for policy 0, policy_version 24664 (0.0009) -[2024-07-05 12:36:04,204][06021] Updated weights for policy 0, policy_version 24674 (0.0010) -[2024-07-05 12:36:04,967][05794] Fps is (10 sec: 37684.8, 60 sec: 37137.1, 300 sec: 37183.4). Total num frames: 182149120. Throughput: 0: 9281.2. Samples: 7933744. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:36:04,968][05794] Avg episode reward: [(0, '48.919')] -[2024-07-05 12:36:06,427][06021] Updated weights for policy 0, policy_version 24684 (0.0009) -[2024-07-05 12:36:08,632][06021] Updated weights for policy 0, policy_version 24694 (0.0009) -[2024-07-05 12:36:09,967][05794] Fps is (10 sec: 37683.6, 60 sec: 37137.1, 300 sec: 37183.3). Total num frames: 182337536. Throughput: 0: 9278.8. Samples: 7961160. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:36:09,968][05794] Avg episode reward: [(0, '47.579')] -[2024-07-05 12:36:10,871][06021] Updated weights for policy 0, policy_version 24704 (0.0012) -[2024-07-05 12:36:13,092][06021] Updated weights for policy 0, policy_version 24714 (0.0009) -[2024-07-05 12:36:14,967][05794] Fps is (10 sec: 36863.9, 60 sec: 37137.0, 300 sec: 37184.2). Total num frames: 182517760. Throughput: 0: 9267.3. Samples: 8016492. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:36:14,968][05794] Avg episode reward: [(0, '50.274')] -[2024-07-05 12:36:15,314][06021] Updated weights for policy 0, policy_version 24724 (0.0010) -[2024-07-05 12:36:17,534][06021] Updated weights for policy 0, policy_version 24734 (0.0009) -[2024-07-05 12:36:19,722][06021] Updated weights for policy 0, policy_version 24744 (0.0009) -[2024-07-05 12:36:19,967][05794] Fps is (10 sec: 36863.6, 60 sec: 37137.0, 300 sec: 37183.3). Total num frames: 182706176. Throughput: 0: 9271.4. Samples: 8072304. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:36:19,968][05794] Avg episode reward: [(0, '50.863')] -[2024-07-05 12:36:21,957][06021] Updated weights for policy 0, policy_version 24754 (0.0010) -[2024-07-05 12:36:24,150][06021] Updated weights for policy 0, policy_version 24764 (0.0011) -[2024-07-05 12:36:24,969][05794] Fps is (10 sec: 36857.4, 60 sec: 36999.4, 300 sec: 37155.3). Total num frames: 182886400. Throughput: 0: 9282.0. Samples: 8100264. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:36:24,982][05794] Avg episode reward: [(0, '51.812')] -[2024-07-05 12:36:26,387][06021] Updated weights for policy 0, policy_version 24774 (0.0009) -[2024-07-05 12:36:28,585][06021] Updated weights for policy 0, policy_version 24784 (0.0009) -[2024-07-05 12:36:29,967][05794] Fps is (10 sec: 36863.7, 60 sec: 37137.1, 300 sec: 37183.3). Total num frames: 183074816. Throughput: 0: 9270.7. Samples: 8155540. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:36:29,968][05794] Avg episode reward: [(0, '50.995')] -[2024-07-05 12:36:30,796][06021] Updated weights for policy 0, policy_version 24794 (0.0009) -[2024-07-05 12:36:33,055][06021] Updated weights for policy 0, policy_version 24804 (0.0009) -[2024-07-05 12:36:34,967][05794] Fps is (10 sec: 36869.3, 60 sec: 37000.3, 300 sec: 37156.3). Total num frames: 183255040. Throughput: 0: 9259.2. Samples: 8210772. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:36:34,968][05794] Avg episode reward: [(0, '51.208')] -[2024-07-05 12:36:35,260][06021] Updated weights for policy 0, policy_version 24814 (0.0012) -[2024-07-05 12:36:37,467][06021] Updated weights for policy 0, policy_version 24824 (0.0009) -[2024-07-05 12:36:39,672][06021] Updated weights for policy 0, policy_version 24834 (0.0012) -[2024-07-05 12:36:39,967][05794] Fps is (10 sec: 36864.4, 60 sec: 37141.3, 300 sec: 37155.6). Total num frames: 183443456. Throughput: 0: 9273.9. Samples: 8239064. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:36:39,969][05794] Avg episode reward: [(0, '50.935')] -[2024-07-05 12:36:41,909][06021] Updated weights for policy 0, policy_version 24844 (0.0009) -[2024-07-05 12:36:44,085][06021] Updated weights for policy 0, policy_version 24854 (0.0008) -[2024-07-05 12:36:44,967][05794] Fps is (10 sec: 37684.5, 60 sec: 37137.1, 300 sec: 37155.6). Total num frames: 183631872. Throughput: 0: 9258.1. Samples: 8294416. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:36:44,968][05794] Avg episode reward: [(0, '50.150')] -[2024-07-05 12:36:46,286][06021] Updated weights for policy 0, policy_version 24864 (0.0012) -[2024-07-05 12:36:48,487][06021] Updated weights for policy 0, policy_version 24874 (0.0009) -[2024-07-05 12:36:49,967][05794] Fps is (10 sec: 36864.2, 60 sec: 37000.7, 300 sec: 37127.8). Total num frames: 183812096. Throughput: 0: 9244.0. Samples: 8349724. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:36:49,968][05794] Avg episode reward: [(0, '50.214')] -[2024-07-05 12:36:50,721][06021] Updated weights for policy 0, policy_version 24884 (0.0010) -[2024-07-05 12:36:52,933][06021] Updated weights for policy 0, policy_version 24894 (0.0009) -[2024-07-05 12:36:54,967][05794] Fps is (10 sec: 36864.1, 60 sec: 37137.3, 300 sec: 37155.6). Total num frames: 184000512. Throughput: 0: 9252.8. Samples: 8377536. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:36:54,968][05794] Avg episode reward: [(0, '52.260')] -[2024-07-05 12:36:55,196][06021] Updated weights for policy 0, policy_version 24904 (0.0010) -[2024-07-05 12:36:57,346][06021] Updated weights for policy 0, policy_version 24914 (0.0009) -[2024-07-05 12:36:59,621][06021] Updated weights for policy 0, policy_version 24924 (0.0009) -[2024-07-05 12:36:59,967][05794] Fps is (10 sec: 36864.2, 60 sec: 37000.6, 300 sec: 37127.8). Total num frames: 184180736. Throughput: 0: 9258.0. Samples: 8433100. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:36:59,968][05794] Avg episode reward: [(0, '48.859')] -[2024-07-05 12:37:01,844][06021] Updated weights for policy 0, policy_version 24934 (0.0014) -[2024-07-05 12:37:03,982][06021] Updated weights for policy 0, policy_version 24944 (0.0009) -[2024-07-05 12:37:04,967][05794] Fps is (10 sec: 36863.8, 60 sec: 37000.5, 300 sec: 37127.8). Total num frames: 184369152. Throughput: 0: 9258.1. Samples: 8488920. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:37:04,968][05794] Avg episode reward: [(0, '50.940')] -[2024-07-05 12:37:06,177][06021] Updated weights for policy 0, policy_version 24954 (0.0009) -[2024-07-05 12:37:08,456][06021] Updated weights for policy 0, policy_version 24964 (0.0012) -[2024-07-05 12:37:09,967][05794] Fps is (10 sec: 37682.4, 60 sec: 37000.4, 300 sec: 37127.8). Total num frames: 184557568. Throughput: 0: 9250.7. Samples: 8516528. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:37:09,968][05794] Avg episode reward: [(0, '52.019')] -[2024-07-05 12:37:10,654][06021] Updated weights for policy 0, policy_version 24974 (0.0010) -[2024-07-05 12:37:12,849][06021] Updated weights for policy 0, policy_version 24984 (0.0009) -[2024-07-05 12:37:14,969][05794] Fps is (10 sec: 36854.5, 60 sec: 36998.9, 300 sec: 37099.7). Total num frames: 184737792. Throughput: 0: 9259.2. Samples: 8572228. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:37:14,970][05794] Avg episode reward: [(0, '45.392')] -[2024-07-05 12:37:15,014][06021] Updated weights for policy 0, policy_version 24994 (0.0010) -[2024-07-05 12:37:17,253][06021] Updated weights for policy 0, policy_version 25004 (0.0010) -[2024-07-05 12:37:19,430][06021] Updated weights for policy 0, policy_version 25014 (0.0010) -[2024-07-05 12:37:19,967][05794] Fps is (10 sec: 36864.6, 60 sec: 37000.6, 300 sec: 37127.8). Total num frames: 184926208. Throughput: 0: 9274.8. Samples: 8628136. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:37:19,968][05794] Avg episode reward: [(0, '52.309')] -[2024-07-05 12:37:21,603][06021] Updated weights for policy 0, policy_version 25024 (0.0013) -[2024-07-05 12:37:23,840][06021] Updated weights for policy 0, policy_version 25034 (0.0010) -[2024-07-05 12:37:24,967][05794] Fps is (10 sec: 37693.0, 60 sec: 37138.2, 300 sec: 37127.8). Total num frames: 185114624. Throughput: 0: 9260.7. Samples: 8655796. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:37:24,968][05794] Avg episode reward: [(0, '52.206')] -[2024-07-05 12:37:26,090][06021] Updated weights for policy 0, policy_version 25044 (0.0010) -[2024-07-05 12:37:28,290][06021] Updated weights for policy 0, policy_version 25054 (0.0010) -[2024-07-05 12:37:29,967][05794] Fps is (10 sec: 36863.9, 60 sec: 37000.6, 300 sec: 37100.0). Total num frames: 185294848. Throughput: 0: 9265.0. Samples: 8711340. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:37:29,968][05794] Avg episode reward: [(0, '51.718')] -[2024-07-05 12:37:30,496][06021] Updated weights for policy 0, policy_version 25064 (0.0009) -[2024-07-05 12:37:32,752][06021] Updated weights for policy 0, policy_version 25074 (0.0012) -[2024-07-05 12:37:34,975][05794] Fps is (10 sec: 36021.9, 60 sec: 36996.8, 300 sec: 37071.5). Total num frames: 185475072. Throughput: 0: 9267.1. Samples: 8766804. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:37:34,978][05794] Avg episode reward: [(0, '47.608')] -[2024-07-05 12:37:34,998][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000025084_185483264.pth... -[2024-07-05 12:37:35,004][06021] Updated weights for policy 0, policy_version 25084 (0.0009) -[2024-07-05 12:37:35,088][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000023998_176586752.pth -[2024-07-05 12:37:37,186][06021] Updated weights for policy 0, policy_version 25094 (0.0012) -[2024-07-05 12:37:39,385][06021] Updated weights for policy 0, policy_version 25104 (0.0009) -[2024-07-05 12:37:39,967][05794] Fps is (10 sec: 36864.1, 60 sec: 37000.6, 300 sec: 37100.7). Total num frames: 185663488. Throughput: 0: 9266.4. Samples: 8794524. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:37:39,968][05794] Avg episode reward: [(0, '48.741')] -[2024-07-05 12:37:41,652][06021] Updated weights for policy 0, policy_version 25114 (0.0009) -[2024-07-05 12:37:43,890][06021] Updated weights for policy 0, policy_version 25124 (0.0009) -[2024-07-05 12:37:44,967][05794] Fps is (10 sec: 36887.9, 60 sec: 36864.1, 300 sec: 37072.3). Total num frames: 185843712. Throughput: 0: 9255.7. Samples: 8849608. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:37:44,967][05794] Avg episode reward: [(0, '51.967')] -[2024-07-05 12:37:46,067][06021] Updated weights for policy 0, policy_version 25134 (0.0009) -[2024-07-05 12:37:48,275][06021] Updated weights for policy 0, policy_version 25144 (0.0010) -[2024-07-05 12:37:49,967][05794] Fps is (10 sec: 36863.9, 60 sec: 37000.5, 300 sec: 37072.3). Total num frames: 186032128. Throughput: 0: 9245.8. Samples: 8904980. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:37:49,968][05794] Avg episode reward: [(0, '51.554')] -[2024-07-05 12:37:50,493][06021] Updated weights for policy 0, policy_version 25154 (0.0009) -[2024-07-05 12:37:52,775][06021] Updated weights for policy 0, policy_version 25164 (0.0010) -[2024-07-05 12:37:54,967][05794] Fps is (10 sec: 36863.5, 60 sec: 36864.0, 300 sec: 37073.4). Total num frames: 186212352. Throughput: 0: 9233.0. Samples: 8932012. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:37:54,968][05794] Avg episode reward: [(0, '52.506')] -[2024-07-05 12:37:55,004][06021] Updated weights for policy 0, policy_version 25174 (0.0010) -[2024-07-05 12:37:57,264][06021] Updated weights for policy 0, policy_version 25184 (0.0009) -[2024-07-05 12:37:59,434][06021] Updated weights for policy 0, policy_version 25194 (0.0011) -[2024-07-05 12:37:59,967][05794] Fps is (10 sec: 36863.8, 60 sec: 37000.5, 300 sec: 37072.3). Total num frames: 186400768. Throughput: 0: 9222.5. Samples: 8987216. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:37:59,968][05794] Avg episode reward: [(0, '50.885')] -[2024-07-05 12:38:01,670][06021] Updated weights for policy 0, policy_version 25204 (0.0012) -[2024-07-05 12:38:03,863][06021] Updated weights for policy 0, policy_version 25214 (0.0012) -[2024-07-05 12:38:04,967][05794] Fps is (10 sec: 37683.4, 60 sec: 37000.6, 300 sec: 37072.3). Total num frames: 186589184. Throughput: 0: 9219.6. Samples: 9043016. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:38:04,968][05794] Avg episode reward: [(0, '52.928')] -[2024-07-05 12:38:06,095][06021] Updated weights for policy 0, policy_version 25224 (0.0013) -[2024-07-05 12:38:08,337][06021] Updated weights for policy 0, policy_version 25234 (0.0010) -[2024-07-05 12:38:09,967][05794] Fps is (10 sec: 36864.6, 60 sec: 36864.2, 300 sec: 37044.5). Total num frames: 186769408. Throughput: 0: 9225.0. Samples: 9070920. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:38:09,968][05794] Avg episode reward: [(0, '52.846')] -[2024-07-05 12:38:10,526][06021] Updated weights for policy 0, policy_version 25244 (0.0009) -[2024-07-05 12:38:12,819][06021] Updated weights for policy 0, policy_version 25254 (0.0009) -[2024-07-05 12:38:14,975][05794] Fps is (10 sec: 36015.2, 60 sec: 36860.6, 300 sec: 37015.7). Total num frames: 186949632. Throughput: 0: 9209.9. Samples: 9125860. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:38:14,990][05794] Avg episode reward: [(0, '52.029')] -[2024-07-05 12:38:15,040][06021] Updated weights for policy 0, policy_version 25264 (0.0016) -[2024-07-05 12:38:17,404][06021] Updated weights for policy 0, policy_version 25274 (0.0010) -[2024-07-05 12:38:19,616][06021] Updated weights for policy 0, policy_version 25284 (0.0013) -[2024-07-05 12:38:19,976][05794] Fps is (10 sec: 36015.2, 60 sec: 36722.5, 300 sec: 37015.7). Total num frames: 187129856. Throughput: 0: 9169.7. Samples: 9179456. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:38:19,992][05794] Avg episode reward: [(0, '50.730')] -[2024-07-05 12:38:21,799][06021] Updated weights for policy 0, policy_version 25294 (0.0013) -[2024-07-05 12:38:24,069][06021] Updated weights for policy 0, policy_version 25304 (0.0010) -[2024-07-05 12:38:24,967][05794] Fps is (10 sec: 36894.2, 60 sec: 36727.5, 300 sec: 37016.7). Total num frames: 187318272. Throughput: 0: 9174.6. Samples: 9207380. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:38:24,968][05794] Avg episode reward: [(0, '50.812')] -[2024-07-05 12:38:26,240][06021] Updated weights for policy 0, policy_version 25314 (0.0009) -[2024-07-05 12:38:28,519][06021] Updated weights for policy 0, policy_version 25324 (0.0012) -[2024-07-05 12:38:29,967][05794] Fps is (10 sec: 36894.1, 60 sec: 36727.5, 300 sec: 37016.7). Total num frames: 187498496. Throughput: 0: 9168.9. Samples: 9262208. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:38:29,968][05794] Avg episode reward: [(0, '49.851')] -[2024-07-05 12:38:30,689][06021] Updated weights for policy 0, policy_version 25334 (0.0009) -[2024-07-05 12:38:32,909][06021] Updated weights for policy 0, policy_version 25344 (0.0009) -[2024-07-05 12:38:34,967][05794] Fps is (10 sec: 36864.1, 60 sec: 36867.9, 300 sec: 37016.7). Total num frames: 187686912. Throughput: 0: 9186.1. Samples: 9318356. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:38:34,968][05794] Avg episode reward: [(0, '51.557')] -[2024-07-05 12:38:35,131][06021] Updated weights for policy 0, policy_version 25354 (0.0009) -[2024-07-05 12:38:37,348][06021] Updated weights for policy 0, policy_version 25364 (0.0009) -[2024-07-05 12:38:39,576][06021] Updated weights for policy 0, policy_version 25374 (0.0009) -[2024-07-05 12:38:39,968][05794] Fps is (10 sec: 36860.4, 60 sec: 36726.9, 300 sec: 37017.5). Total num frames: 187867136. Throughput: 0: 9194.5. Samples: 9345772. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:38:39,969][05794] Avg episode reward: [(0, '50.471')] -[2024-07-05 12:38:41,797][06021] Updated weights for policy 0, policy_version 25384 (0.0009) -[2024-07-05 12:38:43,990][06021] Updated weights for policy 0, policy_version 25394 (0.0009) -[2024-07-05 12:38:44,967][05794] Fps is (10 sec: 36864.2, 60 sec: 36864.0, 300 sec: 37016.7). Total num frames: 188055552. Throughput: 0: 9195.6. Samples: 9401016. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:38:44,968][05794] Avg episode reward: [(0, '53.784')] -[2024-07-05 12:38:46,226][06021] Updated weights for policy 0, policy_version 25404 (0.0009) -[2024-07-05 12:38:48,428][06021] Updated weights for policy 0, policy_version 25414 (0.0010) -[2024-07-05 12:38:49,967][05794] Fps is (10 sec: 36867.3, 60 sec: 36727.4, 300 sec: 36989.0). Total num frames: 188235776. Throughput: 0: 9187.3. Samples: 9456444. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:38:49,968][05794] Avg episode reward: [(0, '50.636')] -[2024-07-05 12:38:50,624][06021] Updated weights for policy 0, policy_version 25424 (0.0009) -[2024-07-05 12:38:53,025][06021] Updated weights for policy 0, policy_version 25434 (0.0014) -[2024-07-05 12:38:54,967][05794] Fps is (10 sec: 35225.3, 60 sec: 36590.9, 300 sec: 36962.0). Total num frames: 188407808. Throughput: 0: 9150.4. Samples: 9482688. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:38:54,968][05794] Avg episode reward: [(0, '50.181')] -[2024-07-05 12:38:55,495][06021] Updated weights for policy 0, policy_version 25444 (0.0010) -[2024-07-05 12:38:58,457][06021] Updated weights for policy 0, policy_version 25454 (0.0017) -[2024-07-05 12:38:59,967][05794] Fps is (10 sec: 31128.5, 60 sec: 35771.5, 300 sec: 36794.5). Total num frames: 188547072. Throughput: 0: 8938.7. Samples: 9528032. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:38:59,970][05794] Avg episode reward: [(0, '49.573')] -[2024-07-05 12:39:01,484][06021] Updated weights for policy 0, policy_version 25464 (0.0024) -[2024-07-05 12:39:04,980][05794] Fps is (10 sec: 22095.1, 60 sec: 33990.8, 300 sec: 36432.3). Total num frames: 188628992. Throughput: 0: 8193.7. Samples: 9548192. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:39:05,076][05794] Avg episode reward: [(0, '49.221')] -[2024-07-05 12:39:09,977][05794] Fps is (10 sec: 8185.5, 60 sec: 30988.7, 300 sec: 35821.6). Total num frames: 188628992. Throughput: 0: 7736.6. Samples: 9555592. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:39:10,043][05794] Avg episode reward: [(0, '49.221')] -[2024-07-05 12:39:14,973][05794] Fps is (10 sec: 819.7, 60 sec: 28127.4, 300 sec: 35211.1). Total num frames: 188637184. Throughput: 0: 6547.2. Samples: 9556864. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:39:15,017][05794] Avg episode reward: [(0, '49.587')] -[2024-07-05 12:39:19,977][05794] Fps is (10 sec: 819.2, 60 sec: 25122.1, 300 sec: 34572.1). Total num frames: 188637184. Throughput: 0: 5311.2. Samples: 9557404. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:39:20,031][05794] Avg episode reward: [(0, '49.213')] -[2024-07-05 12:39:24,983][05794] Fps is (10 sec: 818.4, 60 sec: 22113.1, 300 sec: 33988.2). Total num frames: 188645376. Throughput: 0: 4728.7. Samples: 9558628. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:39:25,047][05794] Avg episode reward: [(0, '49.758')] -[2024-07-05 12:39:29,981][05794] Fps is (10 sec: 818.9, 60 sec: 19110.8, 300 sec: 33349.8). Total num frames: 188645376. Throughput: 0: 3525.4. Samples: 9559700. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:39:30,080][05794] Avg episode reward: [(0, '49.363')] -[2024-07-05 12:39:34,978][05794] Fps is (10 sec: 819.6, 60 sec: 16108.5, 300 sec: 32767.8). Total num frames: 188653568. Throughput: 0: 2306.2. Samples: 9560244. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:39:35,042][05794] Avg episode reward: [(0, '49.787')] -[2024-07-05 12:39:35,578][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000025471_188653568.pth... -[2024-07-05 12:39:39,152][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000024541_181035008.pth -[2024-07-05 12:39:39,981][05794] Fps is (10 sec: 819.2, 60 sec: 13104.9, 300 sec: 32129.0). Total num frames: 188653568. Throughput: 0: 1742.4. Samples: 9561116. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:39:40,054][05794] Avg episode reward: [(0, '49.366')] -[2024-07-05 12:39:44,978][05794] Fps is (10 sec: 0.0, 60 sec: 9965.5, 300 sec: 31489.7). Total num frames: 188653568. Throughput: 0: 758.9. Samples: 9562188. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:39:45,043][05794] Avg episode reward: [(0, '49.470')] -[2024-07-05 12:39:49,990][05794] Fps is (10 sec: 818.6, 60 sec: 7097.6, 300 sec: 30877.7). Total num frames: 188661760. Throughput: 0: 321.2. Samples: 9562648. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:39:50,125][05794] Avg episode reward: [(0, '49.331')] -[2024-07-05 12:39:54,979][05794] Fps is (10 sec: 1638.3, 60 sec: 4368.4, 300 sec: 30295.6). Total num frames: 188669952. Throughput: 0: 181.4. Samples: 9563756. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:39:55,047][05794] Avg episode reward: [(0, '48.765')] -[2024-07-05 12:39:59,976][05794] Fps is (10 sec: 820.1, 60 sec: 2047.8, 300 sec: 29657.0). Total num frames: 188669952. Throughput: 0: 176.8. Samples: 9564820. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:40:00,040][05794] Avg episode reward: [(0, '48.505')] -[2024-07-05 12:40:01,543][06021] Updated weights for policy 0, policy_version 25474 (0.0299) -[2024-07-05 12:40:04,568][06021] Updated weights for policy 0, policy_version 25484 (0.0015) -[2024-07-05 12:40:04,967][05794] Fps is (10 sec: 9839.7, 60 sec: 2321.5, 300 sec: 29352.3). Total num frames: 188768256. Throughput: 0: 695.3. Samples: 9588688. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:40:04,968][05794] Avg episode reward: [(0, '51.241')] -[2024-07-05 12:40:07,455][06021] Updated weights for policy 0, policy_version 25494 (0.0017) -[2024-07-05 12:40:09,967][05794] Fps is (10 sec: 24594.7, 60 sec: 4779.3, 300 sec: 29241.3). Total num frames: 188915712. Throughput: 0: 1145.5. Samples: 9610160. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:40:09,968][05794] Avg episode reward: [(0, '51.318')] -[2024-07-05 12:40:10,163][06021] Updated weights for policy 0, policy_version 25504 (0.0012) -[2024-07-05 12:40:12,571][06021] Updated weights for policy 0, policy_version 25514 (0.0013) -[2024-07-05 12:40:14,813][06021] Updated weights for policy 0, policy_version 25524 (0.0009) -[2024-07-05 12:40:14,967][05794] Fps is (10 sec: 31949.4, 60 sec: 7510.0, 300 sec: 29185.7). Total num frames: 189087744. Throughput: 0: 2214.7. Samples: 9659336. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:40:14,968][05794] Avg episode reward: [(0, '50.734')] -[2024-07-05 12:40:16,967][06021] Updated weights for policy 0, policy_version 25534 (0.0008) -[2024-07-05 12:40:19,136][06021] Updated weights for policy 0, policy_version 25544 (0.0011) -[2024-07-05 12:40:19,967][05794] Fps is (10 sec: 36045.3, 60 sec: 10651.1, 300 sec: 29185.7). Total num frames: 189276160. Throughput: 0: 3470.5. Samples: 9716384. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:40:19,968][05794] Avg episode reward: [(0, '52.363')] -[2024-07-05 12:40:21,295][06021] Updated weights for policy 0, policy_version 25554 (0.0009) -[2024-07-05 12:40:23,494][06021] Updated weights for policy 0, policy_version 25564 (0.0009) -[2024-07-05 12:40:24,967][05794] Fps is (10 sec: 37682.8, 60 sec: 13656.6, 300 sec: 29213.5). Total num frames: 189464576. Throughput: 0: 4080.2. Samples: 9744676. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:40:24,968][05794] Avg episode reward: [(0, '52.215')] -[2024-07-05 12:40:25,716][06021] Updated weights for policy 0, policy_version 25574 (0.0015) -[2024-07-05 12:40:27,894][06021] Updated weights for policy 0, policy_version 25584 (0.0010) -[2024-07-05 12:40:29,980][05794] Fps is (10 sec: 37640.7, 60 sec: 16793.8, 300 sec: 29212.4). Total num frames: 189652992. Throughput: 0: 5305.2. Samples: 9800936. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:40:29,992][05794] Avg episode reward: [(0, '50.902')] -[2024-07-05 12:40:30,033][06021] Updated weights for policy 0, policy_version 25594 (0.0009) -[2024-07-05 12:40:32,277][06021] Updated weights for policy 0, policy_version 25604 (0.0009) -[2024-07-05 12:40:34,488][06021] Updated weights for policy 0, policy_version 25614 (0.0009) -[2024-07-05 12:40:34,967][05794] Fps is (10 sec: 37683.4, 60 sec: 19800.3, 300 sec: 29242.0). Total num frames: 189841408. Throughput: 0: 6535.0. Samples: 9856604. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:40:34,968][05794] Avg episode reward: [(0, '54.787')] -[2024-07-05 12:40:34,973][06001] Saving new best policy, reward=54.787! -[2024-07-05 12:40:36,667][06021] Updated weights for policy 0, policy_version 25624 (0.0009) -[2024-07-05 12:40:38,918][06021] Updated weights for policy 0, policy_version 25634 (0.0010) -[2024-07-05 12:40:39,967][05794] Fps is (10 sec: 36905.3, 60 sec: 22805.5, 300 sec: 29213.5). Total num frames: 190021632. Throughput: 0: 7125.0. Samples: 9884312. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:40:39,968][05794] Avg episode reward: [(0, '50.586')] -[2024-07-05 12:40:41,104][06021] Updated weights for policy 0, policy_version 25644 (0.0010) -[2024-07-05 12:40:43,327][06021] Updated weights for policy 0, policy_version 25654 (0.0010) -[2024-07-05 12:40:44,967][05794] Fps is (10 sec: 36863.7, 60 sec: 25945.1, 300 sec: 29213.5). Total num frames: 190210048. Throughput: 0: 8332.3. Samples: 9939712. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:40:44,968][05794] Avg episode reward: [(0, '52.418')] -[2024-07-05 12:40:45,492][06021] Updated weights for policy 0, policy_version 25664 (0.0009) -[2024-07-05 12:40:47,707][06021] Updated weights for policy 0, policy_version 25674 (0.0009) -[2024-07-05 12:40:49,876][06021] Updated weights for policy 0, policy_version 25684 (0.0009) -[2024-07-05 12:40:49,967][05794] Fps is (10 sec: 37683.4, 60 sec: 28954.0, 300 sec: 29241.3). Total num frames: 190398464. Throughput: 0: 9055.1. Samples: 9996164. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:40:49,968][05794] Avg episode reward: [(0, '51.364')] -[2024-07-05 12:40:52,095][06021] Updated weights for policy 0, policy_version 25694 (0.0012) -[2024-07-05 12:40:54,311][06021] Updated weights for policy 0, policy_version 25704 (0.0009) -[2024-07-05 12:40:54,967][05794] Fps is (10 sec: 36863.8, 60 sec: 31817.3, 300 sec: 29213.5). Total num frames: 190578688. Throughput: 0: 9191.1. Samples: 10023760. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 12:40:54,968][05794] Avg episode reward: [(0, '50.353')] -[2024-07-05 12:40:56,518][06021] Updated weights for policy 0, policy_version 25714 (0.0010) -[2024-07-05 12:40:58,759][06021] Updated weights for policy 0, policy_version 25724 (0.0010) -[2024-07-05 12:40:59,967][05794] Fps is (10 sec: 36863.9, 60 sec: 34957.0, 300 sec: 29213.5). Total num frames: 190767104. Throughput: 0: 9323.7. Samples: 10078904. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 12:40:59,968][05794] Avg episode reward: [(0, '52.174')] -[2024-07-05 12:41:01,026][06021] Updated weights for policy 0, policy_version 25734 (0.0013) -[2024-07-05 12:41:03,258][06021] Updated weights for policy 0, policy_version 25744 (0.0013) -[2024-07-05 12:41:04,976][05794] Fps is (10 sec: 36828.8, 60 sec: 36312.1, 300 sec: 29184.8). Total num frames: 190947328. Throughput: 0: 9286.5. Samples: 10134368. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 12:41:04,977][05794] Avg episode reward: [(0, '49.978')] -[2024-07-05 12:41:05,434][06021] Updated weights for policy 0, policy_version 25754 (0.0012) -[2024-07-05 12:41:07,692][06021] Updated weights for policy 0, policy_version 25764 (0.0009) -[2024-07-05 12:41:09,899][06021] Updated weights for policy 0, policy_version 25774 (0.0009) -[2024-07-05 12:41:09,967][05794] Fps is (10 sec: 36863.9, 60 sec: 37000.5, 300 sec: 29213.5). Total num frames: 191135744. Throughput: 0: 9266.2. Samples: 10161652. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 12:41:09,968][05794] Avg episode reward: [(0, '52.893')] -[2024-07-05 12:41:12,126][06021] Updated weights for policy 0, policy_version 25784 (0.0009) -[2024-07-05 12:41:14,414][06021] Updated weights for policy 0, policy_version 25794 (0.0010) -[2024-07-05 12:41:14,967][05794] Fps is (10 sec: 36899.5, 60 sec: 37137.0, 300 sec: 29185.7). Total num frames: 191315968. Throughput: 0: 9250.3. Samples: 10217096. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:41:14,968][05794] Avg episode reward: [(0, '51.323')] -[2024-07-05 12:41:16,610][06021] Updated weights for policy 0, policy_version 25804 (0.0009) -[2024-07-05 12:41:18,837][06021] Updated weights for policy 0, policy_version 25814 (0.0009) -[2024-07-05 12:41:19,967][05794] Fps is (10 sec: 36864.0, 60 sec: 37137.0, 300 sec: 29213.7). Total num frames: 191504384. Throughput: 0: 9234.4. Samples: 10272152. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:41:19,968][05794] Avg episode reward: [(0, '51.840')] -[2024-07-05 12:41:21,057][06021] Updated weights for policy 0, policy_version 25824 (0.0009) -[2024-07-05 12:41:23,317][06021] Updated weights for policy 0, policy_version 25834 (0.0009) -[2024-07-05 12:41:24,967][05794] Fps is (10 sec: 36864.2, 60 sec: 37000.6, 300 sec: 29185.7). Total num frames: 191684608. Throughput: 0: 9224.9. Samples: 10299432. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:41:24,968][05794] Avg episode reward: [(0, '47.439')] -[2024-07-05 12:41:25,587][06021] Updated weights for policy 0, policy_version 25844 (0.0009) -[2024-07-05 12:41:27,856][06021] Updated weights for policy 0, policy_version 25854 (0.0010) -[2024-07-05 12:41:29,967][05794] Fps is (10 sec: 36044.8, 60 sec: 36870.9, 300 sec: 29185.8). Total num frames: 191864832. Throughput: 0: 9205.7. Samples: 10353968. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:41:29,968][05794] Avg episode reward: [(0, '49.486')] -[2024-07-05 12:41:30,050][06021] Updated weights for policy 0, policy_version 25864 (0.0009) -[2024-07-05 12:41:32,271][06021] Updated weights for policy 0, policy_version 25874 (0.0010) -[2024-07-05 12:41:34,523][06021] Updated weights for policy 0, policy_version 25884 (0.0009) -[2024-07-05 12:41:34,967][05794] Fps is (10 sec: 36864.1, 60 sec: 36864.0, 300 sec: 29185.7). Total num frames: 192053248. Throughput: 0: 9176.5. Samples: 10409108. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:41:34,968][05794] Avg episode reward: [(0, '50.545')] -[2024-07-05 12:41:34,972][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000025886_192053248.pth... -[2024-07-05 12:41:35,056][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000025084_185483264.pth -[2024-07-05 12:41:36,691][06021] Updated weights for policy 0, policy_version 25894 (0.0010) -[2024-07-05 12:41:38,948][06021] Updated weights for policy 0, policy_version 25904 (0.0013) -[2024-07-05 12:41:39,967][05794] Fps is (10 sec: 36863.9, 60 sec: 36864.0, 300 sec: 29158.0). Total num frames: 192233472. Throughput: 0: 9189.5. Samples: 10437284. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:41:39,968][05794] Avg episode reward: [(0, '50.809')] -[2024-07-05 12:41:41,174][06021] Updated weights for policy 0, policy_version 25914 (0.0014) -[2024-07-05 12:41:43,429][06021] Updated weights for policy 0, policy_version 25924 (0.0009) -[2024-07-05 12:41:44,968][05794] Fps is (10 sec: 36860.1, 60 sec: 36863.4, 300 sec: 29185.6). Total num frames: 192421888. Throughput: 0: 9178.9. Samples: 10491964. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:41:44,970][05794] Avg episode reward: [(0, '52.263')] -[2024-07-05 12:41:45,664][06021] Updated weights for policy 0, policy_version 25934 (0.0009) -[2024-07-05 12:41:47,931][06021] Updated weights for policy 0, policy_version 25944 (0.0012) -[2024-07-05 12:41:49,967][05794] Fps is (10 sec: 36863.5, 60 sec: 36727.3, 300 sec: 29158.0). Total num frames: 192602112. Throughput: 0: 9173.3. Samples: 10547080. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:41:49,968][05794] Avg episode reward: [(0, '48.947')] -[2024-07-05 12:41:50,123][06021] Updated weights for policy 0, policy_version 25954 (0.0010) -[2024-07-05 12:41:52,348][06021] Updated weights for policy 0, policy_version 25964 (0.0010) -[2024-07-05 12:41:54,549][06021] Updated weights for policy 0, policy_version 25974 (0.0012) -[2024-07-05 12:41:54,967][05794] Fps is (10 sec: 36047.0, 60 sec: 36727.3, 300 sec: 29157.9). Total num frames: 192782336. Throughput: 0: 9166.7. Samples: 10574156. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:41:54,969][05794] Avg episode reward: [(0, '51.531')] -[2024-07-05 12:41:56,796][06021] Updated weights for policy 0, policy_version 25984 (0.0013) -[2024-07-05 12:41:59,049][06021] Updated weights for policy 0, policy_version 25994 (0.0011) -[2024-07-05 12:41:59,967][05794] Fps is (10 sec: 36043.1, 60 sec: 36590.5, 300 sec: 29130.1). Total num frames: 192962560. Throughput: 0: 9161.9. Samples: 10629388. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:41:59,968][05794] Avg episode reward: [(0, '49.021')] -[2024-07-05 12:42:01,318][06021] Updated weights for policy 0, policy_version 26004 (0.0011) -[2024-07-05 12:42:03,502][06021] Updated weights for policy 0, policy_version 26014 (0.0012) -[2024-07-05 12:42:04,967][05794] Fps is (10 sec: 36865.7, 60 sec: 36733.4, 300 sec: 29130.2). Total num frames: 193150976. Throughput: 0: 9164.2. Samples: 10684540. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:42:04,968][05794] Avg episode reward: [(0, '52.003')] -[2024-07-05 12:42:05,756][06021] Updated weights for policy 0, policy_version 26024 (0.0009) -[2024-07-05 12:42:07,954][06021] Updated weights for policy 0, policy_version 26034 (0.0010) -[2024-07-05 12:42:09,967][05794] Fps is (10 sec: 37685.6, 60 sec: 36727.5, 300 sec: 29158.2). Total num frames: 193339392. Throughput: 0: 9168.2. Samples: 10712000. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:42:09,968][05794] Avg episode reward: [(0, '50.082')] -[2024-07-05 12:42:10,151][06021] Updated weights for policy 0, policy_version 26044 (0.0011) -[2024-07-05 12:42:12,414][06021] Updated weights for policy 0, policy_version 26054 (0.0010) -[2024-07-05 12:42:14,659][06021] Updated weights for policy 0, policy_version 26064 (0.0011) -[2024-07-05 12:42:14,967][05794] Fps is (10 sec: 36863.9, 60 sec: 36727.5, 300 sec: 29130.2). Total num frames: 193519616. Throughput: 0: 9190.8. Samples: 10767556. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:42:14,968][05794] Avg episode reward: [(0, '50.724')] -[2024-07-05 12:42:16,810][06021] Updated weights for policy 0, policy_version 26074 (0.0009) -[2024-07-05 12:42:19,076][06021] Updated weights for policy 0, policy_version 26084 (0.0009) -[2024-07-05 12:42:19,967][05794] Fps is (10 sec: 36864.1, 60 sec: 36727.5, 300 sec: 29130.2). Total num frames: 193708032. Throughput: 0: 9202.1. Samples: 10823204. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:42:19,968][05794] Avg episode reward: [(0, '50.911')] -[2024-07-05 12:42:21,257][06021] Updated weights for policy 0, policy_version 26094 (0.0009) -[2024-07-05 12:42:23,456][06021] Updated weights for policy 0, policy_version 26104 (0.0010) -[2024-07-05 12:42:24,967][05794] Fps is (10 sec: 36864.2, 60 sec: 36727.5, 300 sec: 29130.2). Total num frames: 193888256. Throughput: 0: 9191.0. Samples: 10850880. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:42:24,968][05794] Avg episode reward: [(0, '52.125')] -[2024-07-05 12:42:25,724][06021] Updated weights for policy 0, policy_version 26114 (0.0014) -[2024-07-05 12:42:28,018][06021] Updated weights for policy 0, policy_version 26124 (0.0013) -[2024-07-05 12:42:29,967][05794] Fps is (10 sec: 36044.6, 60 sec: 36727.4, 300 sec: 29130.8). Total num frames: 194068480. Throughput: 0: 9188.7. Samples: 10905444. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:42:29,968][05794] Avg episode reward: [(0, '50.610')] -[2024-07-05 12:42:30,181][06021] Updated weights for policy 0, policy_version 26134 (0.0012) -[2024-07-05 12:42:32,413][06021] Updated weights for policy 0, policy_version 26144 (0.0010) -[2024-07-05 12:42:34,646][06021] Updated weights for policy 0, policy_version 26154 (0.0009) -[2024-07-05 12:42:34,967][05794] Fps is (10 sec: 36863.9, 60 sec: 36727.5, 300 sec: 29130.2). Total num frames: 194256896. Throughput: 0: 9198.1. Samples: 10960992. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:42:34,968][05794] Avg episode reward: [(0, '49.582')] -[2024-07-05 12:42:36,891][06021] Updated weights for policy 0, policy_version 26164 (0.0014) -[2024-07-05 12:42:39,158][06021] Updated weights for policy 0, policy_version 26174 (0.0012) -[2024-07-05 12:42:39,977][05794] Fps is (10 sec: 36833.8, 60 sec: 36722.4, 300 sec: 29129.4). Total num frames: 194437120. Throughput: 0: 9204.3. Samples: 10988420. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:42:39,980][05794] Avg episode reward: [(0, '52.993')] -[2024-07-05 12:42:41,375][06021] Updated weights for policy 0, policy_version 26184 (0.0011) -[2024-07-05 12:42:43,570][06021] Updated weights for policy 0, policy_version 26194 (0.0009) -[2024-07-05 12:42:44,967][05794] Fps is (10 sec: 36864.0, 60 sec: 36728.1, 300 sec: 29130.2). Total num frames: 194625536. Throughput: 0: 9202.8. Samples: 11043508. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:42:44,968][05794] Avg episode reward: [(0, '52.487')] -[2024-07-05 12:42:45,798][06021] Updated weights for policy 0, policy_version 26204 (0.0009) -[2024-07-05 12:42:48,049][06021] Updated weights for policy 0, policy_version 26214 (0.0010) -[2024-07-05 12:42:49,967][05794] Fps is (10 sec: 36894.1, 60 sec: 36727.5, 300 sec: 29130.2). Total num frames: 194805760. Throughput: 0: 9206.9. Samples: 11098852. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:42:49,968][05794] Avg episode reward: [(0, '52.899')] -[2024-07-05 12:42:50,207][06021] Updated weights for policy 0, policy_version 26224 (0.0010) -[2024-07-05 12:42:52,436][06021] Updated weights for policy 0, policy_version 26234 (0.0014) -[2024-07-05 12:42:54,670][06021] Updated weights for policy 0, policy_version 26244 (0.0009) -[2024-07-05 12:42:54,967][05794] Fps is (10 sec: 36864.0, 60 sec: 36864.3, 300 sec: 29130.2). Total num frames: 194994176. Throughput: 0: 9216.2. Samples: 11126728. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:42:54,968][05794] Avg episode reward: [(0, '51.102')] -[2024-07-05 12:42:56,942][06021] Updated weights for policy 0, policy_version 26254 (0.0012) -[2024-07-05 12:42:59,190][06021] Updated weights for policy 0, policy_version 26264 (0.0012) -[2024-07-05 12:42:59,967][05794] Fps is (10 sec: 36863.7, 60 sec: 36864.3, 300 sec: 29102.4). Total num frames: 195174400. Throughput: 0: 9203.1. Samples: 11181696. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:42:59,968][05794] Avg episode reward: [(0, '51.667')] -[2024-07-05 12:43:01,509][06021] Updated weights for policy 0, policy_version 26274 (0.0009) -[2024-07-05 12:43:03,914][06021] Updated weights for policy 0, policy_version 26284 (0.0010) -[2024-07-05 12:43:04,967][05794] Fps is (10 sec: 35225.2, 60 sec: 36590.9, 300 sec: 29074.6). Total num frames: 195346432. Throughput: 0: 9126.6. Samples: 11233904. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:43:04,968][05794] Avg episode reward: [(0, '52.547')] -[2024-07-05 12:43:06,613][06021] Updated weights for policy 0, policy_version 26294 (0.0012) -[2024-07-05 12:43:09,527][06021] Updated weights for policy 0, policy_version 26304 (0.0016) -[2024-07-05 12:43:09,967][05794] Fps is (10 sec: 31129.8, 60 sec: 35771.7, 300 sec: 28936.6). Total num frames: 195485696. Throughput: 0: 8981.9. Samples: 11255068. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:43:09,968][05794] Avg episode reward: [(0, '52.193')] -[2024-07-05 12:43:14,669][06021] Updated weights for policy 0, policy_version 26314 (0.0057) -[2024-07-05 12:43:14,979][05794] Fps is (10 sec: 21276.5, 60 sec: 33990.7, 300 sec: 28574.5). Total num frames: 195559424. Throughput: 0: 8468.2. Samples: 11286604. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:43:15,043][05794] Avg episode reward: [(0, '52.013')] -[2024-07-05 12:43:19,979][05794] Fps is (10 sec: 7365.5, 60 sec: 30851.4, 300 sec: 27935.2). Total num frames: 195559424. Throughput: 0: 7248.7. Samples: 11287256. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:43:20,067][05794] Avg episode reward: [(0, '51.920')] -[2024-07-05 12:43:24,982][05794] Fps is (10 sec: 0.0, 60 sec: 27847.9, 300 sec: 27324.2). Total num frames: 195559424. Throughput: 0: 6669.5. Samples: 11288564. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:43:25,060][05794] Avg episode reward: [(0, '51.890')] -[2024-07-05 12:43:29,976][05794] Fps is (10 sec: 819.4, 60 sec: 24982.6, 300 sec: 26713.6). Total num frames: 195567616. Throughput: 0: 5469.3. Samples: 11289664. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:43:30,041][05794] Avg episode reward: [(0, '51.754')] -[2024-07-05 12:43:34,980][05794] Fps is (10 sec: 819.2, 60 sec: 21841.3, 300 sec: 26102.4). Total num frames: 195567616. Throughput: 0: 4253.4. Samples: 11290300. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:43:35,039][05794] Avg episode reward: [(0, '52.126')] -[2024-07-05 12:43:36,146][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000026316_195575808.pth... -[2024-07-05 12:43:39,410][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000025471_188653568.pth -[2024-07-05 12:43:39,978][05794] Fps is (10 sec: 819.1, 60 sec: 18978.0, 300 sec: 25491.6). Total num frames: 195575808. Throughput: 0: 3660.4. Samples: 11291480. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:43:40,044][05794] Avg episode reward: [(0, '52.126')] -[2024-07-05 12:43:44,977][05794] Fps is (10 sec: 819.4, 60 sec: 15835.5, 300 sec: 24880.7). Total num frames: 195575808. Throughput: 0: 2460.0. Samples: 11292416. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:43:45,022][05794] Avg episode reward: [(0, '52.126')] -[2024-07-05 12:43:49,978][05794] Fps is (10 sec: 819.2, 60 sec: 12968.7, 300 sec: 24325.3). Total num frames: 195584000. Throughput: 0: 1314.6. Samples: 11293072. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:43:50,029][05794] Avg episode reward: [(0, '52.311')] -[2024-07-05 12:43:54,981][05794] Fps is (10 sec: 819.2, 60 sec: 9828.9, 300 sec: 23853.3). Total num frames: 195584000. Throughput: 0: 868.0. Samples: 11294136. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:43:55,034][05794] Avg episode reward: [(0, '52.311')] -[2024-07-05 12:43:59,975][05794] Fps is (10 sec: 819.5, 60 sec: 6962.6, 300 sec: 23604.5). Total num frames: 195592192. Throughput: 0: 193.6. Samples: 11295316. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:44:00,035][05794] Avg episode reward: [(0, '52.483')] -[2024-07-05 12:44:04,981][05794] Fps is (10 sec: 819.2, 60 sec: 4095.4, 300 sec: 23604.0). Total num frames: 195592192. Throughput: 0: 191.1. Samples: 11295856. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:44:05,040][05794] Avg episode reward: [(0, '52.503')] -[2024-07-05 12:44:09,981][05794] Fps is (10 sec: 0.0, 60 sec: 1774.6, 300 sec: 23575.8). Total num frames: 195592192. Throughput: 0: 187.0. Samples: 11296980. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:44:10,059][05794] Avg episode reward: [(0, '52.347')] -[2024-07-05 12:44:14,986][05794] Fps is (10 sec: 818.5, 60 sec: 682.6, 300 sec: 23603.3). Total num frames: 195600384. Throughput: 0: 187.1. Samples: 11298084. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 12:44:15,053][05794] Avg episode reward: [(0, '52.712')] -[2024-07-05 12:44:19,967][05794] Fps is (10 sec: 4100.6, 60 sec: 1229.0, 300 sec: 23688.5). Total num frames: 195633152. Throughput: 0: 337.8. Samples: 11305496. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:44:19,969][05794] Avg episode reward: [(0, '51.887')] -[2024-07-05 12:44:20,071][06021] Updated weights for policy 0, policy_version 26324 (0.0212) -[2024-07-05 12:44:23,153][06021] Updated weights for policy 0, policy_version 26334 (0.0019) -[2024-07-05 12:44:24,967][05794] Fps is (10 sec: 17232.6, 60 sec: 3550.5, 300 sec: 24160.4). Total num frames: 195772416. Throughput: 0: 756.5. Samples: 11325516. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:44:24,969][05794] Avg episode reward: [(0, '51.434')] -[2024-07-05 12:44:26,044][06021] Updated weights for policy 0, policy_version 26344 (0.0013) -[2024-07-05 12:44:28,555][06021] Updated weights for policy 0, policy_version 26354 (0.0011) -[2024-07-05 12:44:29,968][05794] Fps is (10 sec: 29489.6, 60 sec: 6008.1, 300 sec: 24660.0). Total num frames: 195928064. Throughput: 0: 1749.8. Samples: 11371144. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:44:29,969][05794] Avg episode reward: [(0, '49.212')] -[2024-07-05 12:44:30,979][06021] Updated weights for policy 0, policy_version 26364 (0.0010) -[2024-07-05 12:44:33,140][06021] Updated weights for policy 0, policy_version 26374 (0.0008) -[2024-07-05 12:44:34,967][05794] Fps is (10 sec: 34406.4, 60 sec: 9149.4, 300 sec: 25299.0). Total num frames: 196116480. Throughput: 0: 2949.5. Samples: 11425772. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 12:44:34,968][05794] Avg episode reward: [(0, '52.031')] -[2024-07-05 12:44:35,284][06021] Updated weights for policy 0, policy_version 26384 (0.0008) -[2024-07-05 12:44:37,447][06021] Updated weights for policy 0, policy_version 26394 (0.0008) -[2024-07-05 12:44:39,638][06021] Updated weights for policy 0, policy_version 26404 (0.0012) -[2024-07-05 12:44:39,967][05794] Fps is (10 sec: 37687.4, 60 sec: 12153.2, 300 sec: 25937.5). Total num frames: 196304896. Throughput: 0: 3565.3. Samples: 11454540. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:44:39,968][05794] Avg episode reward: [(0, '52.053')] -[2024-07-05 12:44:41,821][06021] Updated weights for policy 0, policy_version 26414 (0.0009) -[2024-07-05 12:44:44,032][06021] Updated weights for policy 0, policy_version 26424 (0.0011) -[2024-07-05 12:44:44,967][05794] Fps is (10 sec: 37683.5, 60 sec: 15294.0, 300 sec: 26549.3). Total num frames: 196493312. Throughput: 0: 4772.1. Samples: 11510032. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:44:44,968][05794] Avg episode reward: [(0, '51.802')] -[2024-07-05 12:44:46,252][06021] Updated weights for policy 0, policy_version 26434 (0.0009) -[2024-07-05 12:44:48,492][06021] Updated weights for policy 0, policy_version 26444 (0.0009) -[2024-07-05 12:44:49,973][05794] Fps is (10 sec: 36839.3, 60 sec: 18159.7, 300 sec: 27131.1). Total num frames: 196673536. Throughput: 0: 5988.1. Samples: 11565308. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:44:49,975][05794] Avg episode reward: [(0, '53.316')] -[2024-07-05 12:44:50,719][06021] Updated weights for policy 0, policy_version 26454 (0.0009) -[2024-07-05 12:44:52,928][06021] Updated weights for policy 0, policy_version 26464 (0.0009) -[2024-07-05 12:44:54,967][05794] Fps is (10 sec: 36864.0, 60 sec: 21302.4, 300 sec: 27770.2). Total num frames: 196861952. Throughput: 0: 6583.5. Samples: 11593160. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:44:54,968][05794] Avg episode reward: [(0, '51.694')] -[2024-07-05 12:44:55,152][06021] Updated weights for policy 0, policy_version 26474 (0.0009) -[2024-07-05 12:44:57,355][06021] Updated weights for policy 0, policy_version 26484 (0.0009) -[2024-07-05 12:44:59,583][06021] Updated weights for policy 0, policy_version 26494 (0.0009) -[2024-07-05 12:44:59,969][05794] Fps is (10 sec: 36880.1, 60 sec: 24167.8, 300 sec: 28047.0). Total num frames: 197042176. Throughput: 0: 7792.1. Samples: 11648612. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:44:59,980][05794] Avg episode reward: [(0, '50.762')] -[2024-07-05 12:45:01,782][06021] Updated weights for policy 0, policy_version 26504 (0.0011) -[2024-07-05 12:45:04,065][06021] Updated weights for policy 0, policy_version 26514 (0.0010) -[2024-07-05 12:45:04,967][05794] Fps is (10 sec: 36864.2, 60 sec: 27310.8, 300 sec: 28186.0). Total num frames: 197230592. Throughput: 0: 8855.7. Samples: 11703996. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:45:04,968][05794] Avg episode reward: [(0, '53.949')] -[2024-07-05 12:45:06,277][06021] Updated weights for policy 0, policy_version 26524 (0.0010) -[2024-07-05 12:45:08,471][06021] Updated weights for policy 0, policy_version 26534 (0.0010) -[2024-07-05 12:45:09,967][05794] Fps is (10 sec: 36872.3, 60 sec: 30316.3, 300 sec: 28213.8). Total num frames: 197410816. Throughput: 0: 9020.2. Samples: 11731424. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:45:09,968][05794] Avg episode reward: [(0, '52.455')] -[2024-07-05 12:45:10,703][06021] Updated weights for policy 0, policy_version 26544 (0.0012) -[2024-07-05 12:45:12,924][06021] Updated weights for policy 0, policy_version 26554 (0.0010) -[2024-07-05 12:45:14,967][05794] Fps is (10 sec: 36863.7, 60 sec: 33323.6, 300 sec: 28213.8). Total num frames: 197599232. Throughput: 0: 9233.1. Samples: 11786624. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:45:14,968][05794] Avg episode reward: [(0, '50.883')] -[2024-07-05 12:45:15,205][06021] Updated weights for policy 0, policy_version 26564 (0.0012) -[2024-07-05 12:45:17,404][06021] Updated weights for policy 0, policy_version 26574 (0.0009) -[2024-07-05 12:45:19,677][06021] Updated weights for policy 0, policy_version 26584 (0.0012) -[2024-07-05 12:45:19,967][05794] Fps is (10 sec: 36864.6, 60 sec: 35772.1, 300 sec: 28186.0). Total num frames: 197779456. Throughput: 0: 9233.5. Samples: 11841280. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:45:19,968][05794] Avg episode reward: [(0, '52.064')] -[2024-07-05 12:45:21,892][06021] Updated weights for policy 0, policy_version 26594 (0.0010) -[2024-07-05 12:45:24,109][06021] Updated weights for policy 0, policy_version 26604 (0.0009) -[2024-07-05 12:45:24,967][05794] Fps is (10 sec: 36043.1, 60 sec: 36454.1, 300 sec: 28159.3). Total num frames: 197959680. Throughput: 0: 9219.9. Samples: 11869440. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:45:24,968][05794] Avg episode reward: [(0, '53.228')] -[2024-07-05 12:45:26,274][06021] Updated weights for policy 0, policy_version 26614 (0.0010) -[2024-07-05 12:45:28,541][06021] Updated weights for policy 0, policy_version 26624 (0.0010) -[2024-07-05 12:45:29,967][05794] Fps is (10 sec: 36863.5, 60 sec: 37001.2, 300 sec: 28158.3). Total num frames: 198148096. Throughput: 0: 9204.4. Samples: 11924232. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:45:29,968][05794] Avg episode reward: [(0, '53.953')] -[2024-07-05 12:45:30,743][06021] Updated weights for policy 0, policy_version 26634 (0.0009) -[2024-07-05 12:45:32,966][06021] Updated weights for policy 0, policy_version 26644 (0.0009) -[2024-07-05 12:45:34,967][05794] Fps is (10 sec: 36864.8, 60 sec: 36863.9, 300 sec: 28158.2). Total num frames: 198328320. Throughput: 0: 9209.8. Samples: 11979688. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:45:34,969][05794] Avg episode reward: [(0, '51.081')] -[2024-07-05 12:45:35,013][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000026653_198336512.pth... -[2024-07-05 12:45:35,100][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000025886_192053248.pth -[2024-07-05 12:45:35,203][06021] Updated weights for policy 0, policy_version 26654 (0.0009) -[2024-07-05 12:45:37,499][06021] Updated weights for policy 0, policy_version 26664 (0.0009) -[2024-07-05 12:45:39,735][06021] Updated weights for policy 0, policy_version 26674 (0.0010) -[2024-07-05 12:45:39,967][05794] Fps is (10 sec: 36864.4, 60 sec: 36864.0, 300 sec: 28158.3). Total num frames: 198516736. Throughput: 0: 9199.5. Samples: 12007136. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:45:39,968][05794] Avg episode reward: [(0, '48.405')] -[2024-07-05 12:45:41,957][06021] Updated weights for policy 0, policy_version 26684 (0.0010) -[2024-07-05 12:45:44,147][06021] Updated weights for policy 0, policy_version 26694 (0.0010) -[2024-07-05 12:45:44,978][05794] Fps is (10 sec: 36829.8, 60 sec: 36721.6, 300 sec: 28129.6). Total num frames: 198696960. Throughput: 0: 9191.2. Samples: 12062284. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:45:44,979][05794] Avg episode reward: [(0, '50.632')] -[2024-07-05 12:45:46,399][06021] Updated weights for policy 0, policy_version 26704 (0.0010) -[2024-07-05 12:45:48,595][06021] Updated weights for policy 0, policy_version 26714 (0.0010) -[2024-07-05 12:45:49,967][05794] Fps is (10 sec: 36863.6, 60 sec: 36868.1, 300 sec: 28158.3). Total num frames: 198885376. Throughput: 0: 9184.5. Samples: 12117300. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:45:49,968][05794] Avg episode reward: [(0, '51.437')] -[2024-07-05 12:45:50,832][06021] Updated weights for policy 0, policy_version 26724 (0.0010) -[2024-07-05 12:45:53,086][06021] Updated weights for policy 0, policy_version 26734 (0.0009) -[2024-07-05 12:45:54,967][05794] Fps is (10 sec: 36899.3, 60 sec: 36727.5, 300 sec: 28130.5). Total num frames: 199065600. Throughput: 0: 9188.5. Samples: 12144904. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:45:54,968][05794] Avg episode reward: [(0, '51.519')] -[2024-07-05 12:45:55,283][06021] Updated weights for policy 0, policy_version 26744 (0.0010) -[2024-07-05 12:45:57,513][06021] Updated weights for policy 0, policy_version 26754 (0.0013) -[2024-07-05 12:45:59,785][06021] Updated weights for policy 0, policy_version 26764 (0.0009) -[2024-07-05 12:45:59,967][05794] Fps is (10 sec: 36045.0, 60 sec: 36728.9, 300 sec: 28131.4). Total num frames: 199245824. Throughput: 0: 9186.2. Samples: 12200004. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:45:59,968][05794] Avg episode reward: [(0, '50.425')] -[2024-07-05 12:46:02,007][06021] Updated weights for policy 0, policy_version 26774 (0.0009) -[2024-07-05 12:46:04,231][06021] Updated weights for policy 0, policy_version 26784 (0.0009) -[2024-07-05 12:46:04,967][05794] Fps is (10 sec: 36863.9, 60 sec: 36727.4, 300 sec: 28130.5). Total num frames: 199434240. Throughput: 0: 9198.7. Samples: 12255224. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:46:04,968][05794] Avg episode reward: [(0, '53.790')] -[2024-07-05 12:46:06,445][06021] Updated weights for policy 0, policy_version 26794 (0.0010) -[2024-07-05 12:46:08,691][06021] Updated weights for policy 0, policy_version 26804 (0.0012) -[2024-07-05 12:46:09,968][05794] Fps is (10 sec: 36858.1, 60 sec: 36726.6, 300 sec: 28130.4). Total num frames: 199614464. Throughput: 0: 9173.9. Samples: 12282276. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:46:09,970][05794] Avg episode reward: [(0, '53.866')] -[2024-07-05 12:46:10,913][06021] Updated weights for policy 0, policy_version 26814 (0.0010) -[2024-07-05 12:46:13,135][06021] Updated weights for policy 0, policy_version 26824 (0.0010) -[2024-07-05 12:46:14,967][05794] Fps is (10 sec: 36864.4, 60 sec: 36727.5, 300 sec: 28130.5). Total num frames: 199802880. Throughput: 0: 9185.9. Samples: 12337596. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:46:14,968][05794] Avg episode reward: [(0, '51.956')] -[2024-07-05 12:46:15,384][06021] Updated weights for policy 0, policy_version 26834 (0.0012) -[2024-07-05 12:46:17,604][06021] Updated weights for policy 0, policy_version 26844 (0.0013) -[2024-07-05 12:46:19,847][06021] Updated weights for policy 0, policy_version 26854 (0.0009) -[2024-07-05 12:46:19,972][05794] Fps is (10 sec: 36851.8, 60 sec: 36724.4, 300 sec: 28130.0). Total num frames: 199983104. Throughput: 0: 9182.0. Samples: 12392920. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:46:19,973][05794] Avg episode reward: [(0, '50.883')] -[2024-07-05 12:46:20,751][06001] Stopping Batcher_0... -[2024-07-05 12:46:20,751][06001] Loop batcher_evt_loop terminating... -[2024-07-05 12:46:20,751][05794] Component Batcher_0 stopped! -[2024-07-05 12:46:20,755][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000026858_200015872.pth... -[2024-07-05 12:46:20,779][06030] Stopping RolloutWorker_w8... -[2024-07-05 12:46:20,780][06030] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 12:46:20,780][06025] Stopping RolloutWorker_w3... -[2024-07-05 12:46:20,780][06025] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 12:46:20,780][05794] Component RolloutWorker_w8 stopped! -[2024-07-05 12:46:20,780][06024] Stopping RolloutWorker_w2... -[2024-07-05 12:46:20,780][06052] Stopping RolloutWorker_w14... -[2024-07-05 12:46:20,781][06024] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 12:46:20,781][06052] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 12:46:20,781][05794] Component RolloutWorker_w3 stopped! -[2024-07-05 12:46:20,781][05794] Component RolloutWorker_w2 stopped! -[2024-07-05 12:46:20,782][05794] Component RolloutWorker_w14 stopped! -[2024-07-05 12:46:20,782][06035] Stopping RolloutWorker_w15... -[2024-07-05 12:46:20,783][06028] Stopping RolloutWorker_w7... -[2024-07-05 12:46:20,783][05794] Component RolloutWorker_w15 stopped! -[2024-07-05 12:46:20,784][06028] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 12:46:20,783][05794] Component RolloutWorker_w7 stopped! -[2024-07-05 12:46:20,784][06035] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 12:46:20,785][06026] Stopping RolloutWorker_w4... -[2024-07-05 12:46:20,785][06051] Stopping RolloutWorker_w13... -[2024-07-05 12:46:20,785][06026] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 12:46:20,785][06051] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 12:46:20,787][06029] Stopping RolloutWorker_w6... -[2024-07-05 12:46:20,787][06029] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 12:46:20,785][05794] Component RolloutWorker_w4 stopped! -[2024-07-05 12:46:20,788][06032] Stopping RolloutWorker_w10... -[2024-07-05 12:46:20,788][06032] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 12:46:20,788][06023] Stopping RolloutWorker_w0... -[2024-07-05 12:46:20,789][06023] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 12:46:20,788][05794] Component RolloutWorker_w13 stopped! -[2024-07-05 12:46:20,789][06022] Stopping RolloutWorker_w1... -[2024-07-05 12:46:20,789][05794] Component RolloutWorker_w6 stopped! -[2024-07-05 12:46:20,790][06022] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 12:46:20,790][05794] Component RolloutWorker_w10 stopped! -[2024-07-05 12:46:20,791][05794] Component RolloutWorker_w0 stopped! -[2024-07-05 12:46:20,791][06033] Stopping RolloutWorker_w11... -[2024-07-05 12:46:20,792][06033] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 12:46:20,792][05794] Component RolloutWorker_w1 stopped! -[2024-07-05 12:46:20,793][05794] Component RolloutWorker_w11 stopped! -[2024-07-05 12:46:20,796][06027] Stopping RolloutWorker_w5... -[2024-07-05 12:46:20,796][06031] Stopping RolloutWorker_w9... -[2024-07-05 12:46:20,796][06031] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 12:46:20,797][06027] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 12:46:20,796][05794] Component RolloutWorker_w5 stopped! -[2024-07-05 12:46:20,797][05794] Component RolloutWorker_w9 stopped! -[2024-07-05 12:46:20,809][06021] Weights refcount: 2 0 -[2024-07-05 12:46:20,810][06021] Stopping InferenceWorker_p0-w0... -[2024-07-05 12:46:20,811][06021] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 12:46:20,811][05794] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 12:46:20,849][06034] Stopping RolloutWorker_w12... -[2024-07-05 12:46:20,849][06034] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 12:46:20,849][05794] Component RolloutWorker_w12 stopped! -[2024-07-05 12:46:20,861][06001] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000026316_195575808.pth -[2024-07-05 12:46:20,873][06001] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000026858_200015872.pth... -[2024-07-05 12:46:20,987][06001] Stopping LearnerWorker_p0... -[2024-07-05 12:46:20,987][06001] Loop learner_proc0_evt_loop terminating... -[2024-07-05 12:46:20,987][05794] Component LearnerWorker_p0 stopped! -[2024-07-05 12:46:20,988][05794] Waiting for process learner_proc0 to stop... -[2024-07-05 12:46:22,294][05794] Waiting for process inference_proc0-0 to join... -[2024-07-05 12:46:22,295][05794] Waiting for process rollout_proc0 to join... -[2024-07-05 12:46:22,296][05794] Waiting for process rollout_proc1 to join... -[2024-07-05 12:46:22,296][05794] Waiting for process rollout_proc2 to join... -[2024-07-05 12:46:22,297][05794] Waiting for process rollout_proc3 to join... -[2024-07-05 12:46:22,297][05794] Waiting for process rollout_proc4 to join... -[2024-07-05 12:46:22,298][05794] Waiting for process rollout_proc5 to join... -[2024-07-05 12:46:22,298][05794] Waiting for process rollout_proc6 to join... -[2024-07-05 12:46:22,299][05794] Waiting for process rollout_proc7 to join... -[2024-07-05 12:46:22,299][05794] Waiting for process rollout_proc8 to join... -[2024-07-05 12:46:22,299][05794] Waiting for process rollout_proc9 to join... -[2024-07-05 12:46:22,300][05794] Waiting for process rollout_proc10 to join... -[2024-07-05 12:46:22,300][05794] Waiting for process rollout_proc11 to join... -[2024-07-05 12:46:22,301][05794] Waiting for process rollout_proc12 to join... -[2024-07-05 12:46:22,302][05794] Waiting for process rollout_proc13 to join... -[2024-07-05 12:46:22,304][05794] Waiting for process rollout_proc14 to join... -[2024-07-05 12:46:22,305][05794] Waiting for process rollout_proc15 to join... -[2024-07-05 12:46:22,306][05794] Batcher 0 profile tree view: -batching: 127.8786, releasing_batches: 0.1629 -[2024-07-05 12:46:22,307][05794] InferenceWorker_p0-w0 profile tree view: -wait_policy: 0.0000 - wait_policy_total: 57.6522 -update_model: 21.6168 - weight_update: 0.0010 -one_step: 0.0036 - handle_policy_step: 1341.7551 - deserialize: 98.6255, stack: 7.2322, obs_to_device_normalize: 328.1361, forward: 635.2870, send_messages: 66.6407 - prepare_outputs: 156.7005 - to_cpu: 94.2635 -[2024-07-05 12:46:22,307][05794] Learner 0 profile tree view: -misc: 0.0269, prepare_batch: 134.4631 -train: 402.5293 - epoch_init: 0.0281, minibatch_init: 0.0424, losses_postprocess: 1.7434, kl_divergence: 2.2588, after_optimizer: 3.0663 - calculate_losses: 191.4639 - losses_init: 0.0188, forward_head: 10.6548, bptt_initial: 156.2072, tail: 4.5788, advantages_returns: 1.3535, losses: 6.9534 - bptt: 8.3163 - bptt_forward_core: 7.9239 - update: 201.3764 - clip: 5.7973 -[2024-07-05 12:46:22,308][05794] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.6947, enqueue_policy_requests: 39.5976, env_step: 762.0198, overhead: 71.0542, complete_rollouts: 1.8167 -save_policy_outputs: 69.6854 - split_output_tensors: 32.7119 -[2024-07-05 12:46:22,309][05794] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.6627, enqueue_policy_requests: 40.8541, env_step: 785.0653, overhead: 73.6601, complete_rollouts: 1.6997 -save_policy_outputs: 68.7446 - split_output_tensors: 32.3011 -[2024-07-05 12:46:22,310][05794] Loop Runner_EvtLoop terminating... -[2024-07-05 12:46:22,311][05794] Runner profile tree view: -main_loop: 1481.7354 -[2024-07-05 12:46:22,311][05794] Collected {0: 200015872}, FPS: 33470.5 -[2024-07-05 12:46:34,045][05794] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 12:46:34,046][05794] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 12:46:34,047][05794] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 12:46:34,047][05794] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 12:46:34,048][05794] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 12:46:34,048][05794] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 12:46:34,049][05794] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 12:46:34,049][05794] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 12:46:34,049][05794] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 12:46:34,050][05794] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 12:46:34,051][05794] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 12:46:34,051][05794] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 12:46:34,052][05794] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 12:46:34,052][05794] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 12:46:34,052][05794] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 12:46:34,080][05794] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:46:34,083][05794] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 12:46:34,084][05794] RunningMeanStd input shape: (1,) -[2024-07-05 12:46:34,097][05794] ConvEncoder: input_channels=3 -[2024-07-05 12:46:34,184][05794] Conv encoder output size: 512 -[2024-07-05 12:46:34,186][05794] Policy head output size: 512 -[2024-07-05 12:46:36,021][05794] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000026858_200015872.pth... -[2024-07-05 12:46:36,884][05794] Num frames 100... -[2024-07-05 12:46:36,963][05794] Num frames 200... -[2024-07-05 12:46:37,042][05794] Num frames 300... -[2024-07-05 12:46:37,119][05794] Num frames 400... -[2024-07-05 12:46:37,191][05794] Num frames 500... -[2024-07-05 12:46:37,273][05794] Num frames 600... -[2024-07-05 12:46:37,371][05794] Num frames 700... -[2024-07-05 12:46:37,480][05794] Num frames 800... -[2024-07-05 12:46:37,560][05794] Num frames 900... -[2024-07-05 12:46:37,642][05794] Num frames 1000... -[2024-07-05 12:46:37,723][05794] Num frames 1100... -[2024-07-05 12:46:37,800][05794] Num frames 1200... -[2024-07-05 12:46:37,886][05794] Num frames 1300... -[2024-07-05 12:46:37,963][05794] Num frames 1400... -[2024-07-05 12:46:38,039][05794] Num frames 1500... -[2024-07-05 12:46:38,120][05794] Num frames 1600... -[2024-07-05 12:46:38,202][05794] Num frames 1700... -[2024-07-05 12:46:38,283][05794] Num frames 1800... -[2024-07-05 12:46:38,368][05794] Num frames 1900... -[2024-07-05 12:46:38,468][05794] Num frames 2000... -[2024-07-05 12:46:38,550][05794] Num frames 2100... -[2024-07-05 12:46:38,602][05794] Avg episode rewards: #0: 53.999, true rewards: #0: 21.000 -[2024-07-05 12:46:38,602][05794] Avg episode reward: 53.999, avg true_objective: 21.000 -[2024-07-05 12:46:38,678][05794] Num frames 2200... -[2024-07-05 12:46:38,758][05794] Num frames 2300... -[2024-07-05 12:46:38,837][05794] Num frames 2400... -[2024-07-05 12:46:38,916][05794] Num frames 2500... -[2024-07-05 12:46:38,994][05794] Num frames 2600... -[2024-07-05 12:46:39,076][05794] Num frames 2700... -[2024-07-05 12:46:39,158][05794] Num frames 2800... -[2024-07-05 12:46:39,232][05794] Num frames 2900... -[2024-07-05 12:46:39,309][05794] Num frames 3000... -[2024-07-05 12:46:39,388][05794] Num frames 3100... -[2024-07-05 12:46:39,465][05794] Num frames 3200... -[2024-07-05 12:46:39,544][05794] Num frames 3300... -[2024-07-05 12:46:39,623][05794] Num frames 3400... -[2024-07-05 12:46:39,708][05794] Num frames 3500... -[2024-07-05 12:46:39,787][05794] Num frames 3600... -[2024-07-05 12:46:39,867][05794] Num frames 3700... -[2024-07-05 12:46:39,948][05794] Num frames 3800... -[2024-07-05 12:46:40,027][05794] Num frames 3900... -[2024-07-05 12:46:40,106][05794] Num frames 4000... -[2024-07-05 12:46:40,186][05794] Num frames 4100... -[2024-07-05 12:46:40,265][05794] Num frames 4200... -[2024-07-05 12:46:40,317][05794] Avg episode rewards: #0: 56.999, true rewards: #0: 21.000 -[2024-07-05 12:46:40,319][05794] Avg episode reward: 56.999, avg true_objective: 21.000 -[2024-07-05 12:46:40,395][05794] Num frames 4300... -[2024-07-05 12:46:40,473][05794] Num frames 4400... -[2024-07-05 12:46:40,549][05794] Num frames 4500... -[2024-07-05 12:46:40,625][05794] Num frames 4600... -[2024-07-05 12:46:40,702][05794] Num frames 4700... -[2024-07-05 12:46:40,780][05794] Num frames 4800... -[2024-07-05 12:46:40,857][05794] Num frames 4900... -[2024-07-05 12:46:40,934][05794] Num frames 5000... -[2024-07-05 12:46:41,012][05794] Num frames 5100... -[2024-07-05 12:46:41,088][05794] Num frames 5200... -[2024-07-05 12:46:41,166][05794] Num frames 5300... -[2024-07-05 12:46:41,245][05794] Num frames 5400... -[2024-07-05 12:46:41,323][05794] Num frames 5500... -[2024-07-05 12:46:41,401][05794] Num frames 5600... -[2024-07-05 12:46:41,477][05794] Num frames 5700... -[2024-07-05 12:46:41,552][05794] Num frames 5800... -[2024-07-05 12:46:41,629][05794] Num frames 5900... -[2024-07-05 12:46:41,708][05794] Num frames 6000... -[2024-07-05 12:46:41,787][05794] Num frames 6100... -[2024-07-05 12:46:41,866][05794] Num frames 6200... -[2024-07-05 12:46:41,958][05794] Num frames 6300... -[2024-07-05 12:46:42,010][05794] Avg episode rewards: #0: 55.666, true rewards: #0: 21.000 -[2024-07-05 12:46:42,011][05794] Avg episode reward: 55.666, avg true_objective: 21.000 -[2024-07-05 12:46:42,088][05794] Num frames 6400... -[2024-07-05 12:46:42,165][05794] Num frames 6500... -[2024-07-05 12:46:42,242][05794] Num frames 6600... -[2024-07-05 12:46:42,319][05794] Num frames 6700... -[2024-07-05 12:46:42,394][05794] Num frames 6800... -[2024-07-05 12:46:42,468][05794] Num frames 6900... -[2024-07-05 12:46:42,544][05794] Num frames 7000... -[2024-07-05 12:46:42,623][05794] Num frames 7100... -[2024-07-05 12:46:42,699][05794] Num frames 7200... -[2024-07-05 12:46:42,772][05794] Num frames 7300... -[2024-07-05 12:46:42,843][05794] Num frames 7400... -[2024-07-05 12:46:42,913][05794] Num frames 7500... -[2024-07-05 12:46:42,989][05794] Num frames 7600... -[2024-07-05 12:46:43,066][05794] Num frames 7700... -[2024-07-05 12:46:43,143][05794] Num frames 7800... -[2024-07-05 12:46:43,220][05794] Num frames 7900... -[2024-07-05 12:46:43,296][05794] Num frames 8000... -[2024-07-05 12:46:43,375][05794] Num frames 8100... -[2024-07-05 12:46:43,455][05794] Num frames 8200... -[2024-07-05 12:46:43,531][05794] Num frames 8300... -[2024-07-05 12:46:43,612][05794] Num frames 8400... -[2024-07-05 12:46:43,664][05794] Avg episode rewards: #0: 56.749, true rewards: #0: 21.000 -[2024-07-05 12:46:43,665][05794] Avg episode reward: 56.749, avg true_objective: 21.000 -[2024-07-05 12:46:43,741][05794] Num frames 8500... -[2024-07-05 12:46:43,816][05794] Num frames 8600... -[2024-07-05 12:46:43,893][05794] Num frames 8700... -[2024-07-05 12:46:43,971][05794] Num frames 8800... -[2024-07-05 12:46:44,048][05794] Num frames 8900... -[2024-07-05 12:46:44,125][05794] Num frames 9000... -[2024-07-05 12:46:44,200][05794] Num frames 9100... -[2024-07-05 12:46:44,277][05794] Num frames 9200... -[2024-07-05 12:46:44,358][05794] Num frames 9300... -[2024-07-05 12:46:44,439][05794] Num frames 9400... -[2024-07-05 12:46:44,517][05794] Num frames 9500... -[2024-07-05 12:46:44,615][05794] Num frames 9600... -[2024-07-05 12:46:44,696][05794] Num frames 9700... -[2024-07-05 12:46:44,770][05794] Num frames 9800... -[2024-07-05 12:46:44,846][05794] Num frames 9900... -[2024-07-05 12:46:44,921][05794] Num frames 10000... -[2024-07-05 12:46:45,001][05794] Num frames 10100... -[2024-07-05 12:46:45,082][05794] Num frames 10200... -[2024-07-05 12:46:45,158][05794] Num frames 10300... -[2024-07-05 12:46:45,235][05794] Num frames 10400... -[2024-07-05 12:46:45,315][05794] Num frames 10500... -[2024-07-05 12:46:45,367][05794] Avg episode rewards: #0: 55.999, true rewards: #0: 21.000 -[2024-07-05 12:46:45,368][05794] Avg episode reward: 55.999, avg true_objective: 21.000 -[2024-07-05 12:46:45,446][05794] Num frames 10600... -[2024-07-05 12:46:45,519][05794] Num frames 10700... -[2024-07-05 12:46:45,592][05794] Num frames 10800... -[2024-07-05 12:46:45,670][05794] Num frames 10900... -[2024-07-05 12:46:45,738][05794] Num frames 11000... -[2024-07-05 12:46:45,816][05794] Num frames 11100... -[2024-07-05 12:46:45,882][05794] Num frames 11200... -[2024-07-05 12:46:45,945][05794] Num frames 11300... -[2024-07-05 12:46:46,014][05794] Num frames 11400... -[2024-07-05 12:46:46,084][05794] Num frames 11500... -[2024-07-05 12:46:46,150][05794] Num frames 11600... -[2024-07-05 12:46:46,216][05794] Num frames 11700... -[2024-07-05 12:46:46,298][05794] Num frames 11800... -[2024-07-05 12:46:46,383][05794] Num frames 11900... -[2024-07-05 12:46:46,461][05794] Num frames 12000... -[2024-07-05 12:46:46,539][05794] Num frames 12100... -[2024-07-05 12:46:46,624][05794] Num frames 12200... -[2024-07-05 12:46:46,704][05794] Num frames 12300... -[2024-07-05 12:46:46,783][05794] Num frames 12400... -[2024-07-05 12:46:46,858][05794] Num frames 12500... -[2024-07-05 12:46:46,937][05794] Num frames 12600... -[2024-07-05 12:46:46,989][05794] Avg episode rewards: #0: 56.666, true rewards: #0: 21.000 -[2024-07-05 12:46:46,990][05794] Avg episode reward: 56.666, avg true_objective: 21.000 -[2024-07-05 12:46:47,068][05794] Num frames 12700... -[2024-07-05 12:46:47,147][05794] Num frames 12800... -[2024-07-05 12:46:47,223][05794] Num frames 12900... -[2024-07-05 12:46:47,295][05794] Num frames 13000... -[2024-07-05 12:46:47,368][05794] Num frames 13100... -[2024-07-05 12:46:47,443][05794] Num frames 13200... -[2024-07-05 12:46:47,516][05794] Num frames 13300... -[2024-07-05 12:46:47,597][05794] Avg episode rewards: #0: 51.193, true rewards: #0: 19.051 -[2024-07-05 12:46:47,599][05794] Avg episode reward: 51.193, avg true_objective: 19.051 -[2024-07-05 12:46:47,648][05794] Num frames 13400... -[2024-07-05 12:46:47,722][05794] Num frames 13500... -[2024-07-05 12:46:47,797][05794] Num frames 13600... -[2024-07-05 12:46:47,873][05794] Num frames 13700... -[2024-07-05 12:46:47,963][05794] Num frames 13800... -[2024-07-05 12:46:48,041][05794] Num frames 13900... -[2024-07-05 12:46:48,118][05794] Num frames 14000... -[2024-07-05 12:46:48,195][05794] Num frames 14100... -[2024-07-05 12:46:48,272][05794] Num frames 14200... -[2024-07-05 12:46:48,350][05794] Num frames 14300... -[2024-07-05 12:46:48,428][05794] Num frames 14400... -[2024-07-05 12:46:48,505][05794] Num frames 14500... -[2024-07-05 12:46:48,582][05794] Num frames 14600... -[2024-07-05 12:46:48,658][05794] Num frames 14700... -[2024-07-05 12:46:48,735][05794] Num frames 14800... -[2024-07-05 12:46:48,813][05794] Num frames 14900... -[2024-07-05 12:46:48,890][05794] Num frames 15000... -[2024-07-05 12:46:48,969][05794] Num frames 15100... -[2024-07-05 12:46:49,046][05794] Num frames 15200... -[2024-07-05 12:46:49,126][05794] Num frames 15300... -[2024-07-05 12:46:49,204][05794] Num frames 15400... -[2024-07-05 12:46:49,287][05794] Avg episode rewards: #0: 51.919, true rewards: #0: 19.295 -[2024-07-05 12:46:49,289][05794] Avg episode reward: 51.919, avg true_objective: 19.295 -[2024-07-05 12:46:49,341][05794] Num frames 15500... -[2024-07-05 12:46:49,418][05794] Num frames 15600... -[2024-07-05 12:46:49,495][05794] Num frames 15700... -[2024-07-05 12:46:49,571][05794] Num frames 15800... -[2024-07-05 12:46:49,649][05794] Num frames 15900... -[2024-07-05 12:46:49,726][05794] Num frames 16000... -[2024-07-05 12:46:49,803][05794] Num frames 16100... -[2024-07-05 12:46:49,880][05794] Num frames 16200... -[2024-07-05 12:46:49,957][05794] Num frames 16300... -[2024-07-05 12:46:50,033][05794] Num frames 16400... -[2024-07-05 12:46:50,108][05794] Num frames 16500... -[2024-07-05 12:46:50,185][05794] Num frames 16600... -[2024-07-05 12:46:50,263][05794] Num frames 16700... -[2024-07-05 12:46:50,341][05794] Num frames 16800... -[2024-07-05 12:46:50,417][05794] Num frames 16900... -[2024-07-05 12:46:50,493][05794] Num frames 17000... -[2024-07-05 12:46:50,572][05794] Num frames 17100... -[2024-07-05 12:46:50,650][05794] Num frames 17200... -[2024-07-05 12:46:50,724][05794] Num frames 17300... -[2024-07-05 12:46:50,800][05794] Num frames 17400... -[2024-07-05 12:46:50,879][05794] Num frames 17500... -[2024-07-05 12:46:50,961][05794] Avg episode rewards: #0: 52.595, true rewards: #0: 19.484 -[2024-07-05 12:46:50,963][05794] Avg episode reward: 52.595, avg true_objective: 19.484 -[2024-07-05 12:46:51,028][05794] Num frames 17600... -[2024-07-05 12:46:51,103][05794] Num frames 17700... -[2024-07-05 12:46:51,179][05794] Num frames 17800... -[2024-07-05 12:46:51,256][05794] Num frames 17900... -[2024-07-05 12:46:51,331][05794] Num frames 18000... -[2024-07-05 12:46:51,405][05794] Num frames 18100... -[2024-07-05 12:46:51,482][05794] Num frames 18200... -[2024-07-05 12:46:51,560][05794] Num frames 18300... -[2024-07-05 12:46:51,636][05794] Num frames 18400... -[2024-07-05 12:46:51,711][05794] Num frames 18500... -[2024-07-05 12:46:51,788][05794] Num frames 18600... -[2024-07-05 12:46:51,866][05794] Num frames 18700... -[2024-07-05 12:46:51,944][05794] Num frames 18800... -[2024-07-05 12:46:52,022][05794] Num frames 18900... -[2024-07-05 12:46:52,100][05794] Num frames 19000... -[2024-07-05 12:46:52,177][05794] Num frames 19100... -[2024-07-05 12:46:52,254][05794] Num frames 19200... -[2024-07-05 12:46:52,357][05794] Avg episode rewards: #0: 52.363, true rewards: #0: 19.264 -[2024-07-05 12:46:52,358][05794] Avg episode reward: 52.363, avg true_objective: 19.264 -[2024-07-05 12:47:15,283][05794] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 12:47:54,388][05794] Environment doom_basic already registered, overwriting... -[2024-07-05 12:47:54,389][05794] Environment doom_two_colors_easy already registered, overwriting... -[2024-07-05 12:47:54,390][05794] Environment doom_two_colors_hard already registered, overwriting... -[2024-07-05 12:47:54,390][05794] Environment doom_dm already registered, overwriting... -[2024-07-05 12:47:54,390][05794] Environment doom_dwango5 already registered, overwriting... -[2024-07-05 12:47:54,391][05794] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-07-05 12:47:54,391][05794] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-07-05 12:47:54,392][05794] Environment doom_my_way_home already registered, overwriting... -[2024-07-05 12:47:54,392][05794] Environment doom_deadly_corridor already registered, overwriting... -[2024-07-05 12:47:54,392][05794] Environment doom_defend_the_center already registered, overwriting... -[2024-07-05 12:47:54,393][05794] Environment doom_defend_the_line already registered, overwriting... -[2024-07-05 12:47:54,393][05794] Environment doom_health_gathering already registered, overwriting... -[2024-07-05 12:47:54,394][05794] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-07-05 12:47:54,394][05794] Environment doom_battle already registered, overwriting... -[2024-07-05 12:47:54,394][05794] Environment doom_battle2 already registered, overwriting... -[2024-07-05 12:47:54,395][05794] Environment doom_duel_bots already registered, overwriting... -[2024-07-05 12:47:54,395][05794] Environment doom_deathmatch_bots already registered, overwriting... -[2024-07-05 12:47:54,396][05794] Environment doom_duel already registered, overwriting... -[2024-07-05 12:47:54,396][05794] Environment doom_deathmatch_full already registered, overwriting... -[2024-07-05 12:47:54,397][05794] Environment doom_benchmark already registered, overwriting... -[2024-07-05 12:47:54,397][05794] register_encoder_factory: -[2024-07-05 12:47:54,408][05794] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 12:47:54,409][05794] Overriding arg 'train_for_env_steps' with value 250000000 passed from command line -[2024-07-05 12:47:54,415][05794] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment already exists! -[2024-07-05 12:47:54,417][05794] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment... -[2024-07-05 12:47:54,418][05794] Weights and Biases integration disabled -[2024-07-05 12:47:54,420][05794] Environment var CUDA_VISIBLE_DEVICES is 0 - -[2024-07-05 12:47:57,781][05794] Starting experiment with the following configuration: -help=False -algo=APPO -env=doom_health_gathering_supreme -experiment=default_experiment -train_dir=/home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir -restart_behavior=resume -device=gpu -seed=200 -num_policies=1 -async_rl=True -serial_mode=False -batched_sampling=False -num_batches_to_accumulate=2 -worker_num_splits=2 -policy_workers_per_policy=1 -max_policy_lag=1000 -num_workers=16 -num_envs_per_worker=8 -batch_size=2048 -num_batches_per_epoch=1 -num_epochs=1 -rollout=32 -recurrence=32 -shuffle_minibatches=False -gamma=0.99 -reward_scale=1.0 -reward_clip=1000.0 -value_bootstrap=False -normalize_returns=True -exploration_loss_coeff=0.001 -value_loss_coeff=0.5 -kl_loss_coeff=0.0 -exploration_loss=symmetric_kl -gae_lambda=0.95 -ppo_clip_ratio=0.1 -ppo_clip_value=0.2 -with_vtrace=False -vtrace_rho=1.0 -vtrace_c=1.0 -optimizer=adam -adam_eps=1e-06 -adam_beta1=0.9 -adam_beta2=0.999 -max_grad_norm=4.0 -learning_rate=0.0001 -lr_schedule=constant -lr_schedule_kl_threshold=0.008 -lr_adaptive_min=1e-06 -lr_adaptive_max=0.01 -obs_subtract_mean=0.0 -obs_scale=255.0 -normalize_input=True -normalize_input_keys=None -decorrelate_experience_max_seconds=0 -decorrelate_envs_on_one_worker=True -actor_worker_gpus=[] -set_workers_cpu_affinity=True -force_envs_single_thread=False -default_niceness=0 -log_to_file=True -experiment_summaries_interval=10 -flush_summaries_interval=30 -stats_avg=100 -summaries_use_frameskip=True -heartbeat_interval=20 -heartbeat_reporting_interval=600 -train_for_env_steps=250000000 -train_for_seconds=10000000000 -save_every_sec=120 -keep_checkpoints=2 -load_checkpoint_kind=latest -save_milestones_sec=-1 -save_best_every_sec=5 -save_best_metric=reward -save_best_after=100000 -benchmark=False -encoder_mlp_layers=[512, 512] -encoder_conv_architecture=convnet_simple -encoder_conv_mlp_layers=[512] -use_rnn=True -rnn_size=512 -rnn_type=gru -rnn_num_layers=1 -decoder_mlp_layers=[] -nonlinearity=elu -policy_initialization=orthogonal -policy_init_gain=1.0 -actor_critic_share_weights=True -adaptive_stddev=True -continuous_tanh_scale=0.0 -initial_stddev=1.0 -use_env_info_cache=False -env_gpu_actions=False -env_gpu_observations=True -env_frameskip=4 -env_framestack=1 -pixel_format=CHW -use_record_episode_statistics=False -with_wandb=False -wandb_user=None -wandb_project=sample_factory -wandb_group=None -wandb_job_type=SF -wandb_tags=[] -with_pbt=False -pbt_mix_policies_in_one_env=True -pbt_period_env_steps=5000000 -pbt_start_mutation=20000000 -pbt_replace_fraction=0.3 -pbt_mutation_rate=0.15 -pbt_replace_reward_gap=0.1 -pbt_replace_reward_gap_absolute=1e-06 -pbt_optimize_gamma=False -pbt_target_objective=true_objective -pbt_perturb_min=1.1 -pbt_perturb_max=1.5 -num_agents=-1 -num_humans=0 -num_bots=-1 -start_bot_difficulty=None -timelimit=None -res_w=128 -res_h=72 -wide_aspect_ratio=False -eval_env_frameskip=1 -fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=20000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 20000000} -git_hash=unknown -git_repo_name=not a git repository -[2024-07-05 12:47:57,782][05794] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 12:47:57,783][05794] Rollout worker 0 uses device cpu -[2024-07-05 12:47:57,783][05794] Rollout worker 1 uses device cpu -[2024-07-05 12:47:57,784][05794] Rollout worker 2 uses device cpu -[2024-07-05 12:47:57,784][05794] Rollout worker 3 uses device cpu -[2024-07-05 12:47:57,785][05794] Rollout worker 4 uses device cpu -[2024-07-05 12:47:57,785][05794] Rollout worker 5 uses device cpu -[2024-07-05 12:47:57,785][05794] Rollout worker 6 uses device cpu -[2024-07-05 12:47:57,786][05794] Rollout worker 7 uses device cpu -[2024-07-05 12:47:57,786][05794] Rollout worker 8 uses device cpu -[2024-07-05 12:47:57,786][05794] Rollout worker 9 uses device cpu -[2024-07-05 12:47:57,787][05794] Rollout worker 10 uses device cpu -[2024-07-05 12:47:57,787][05794] Rollout worker 11 uses device cpu -[2024-07-05 12:47:57,787][05794] Rollout worker 12 uses device cpu -[2024-07-05 12:47:57,788][05794] Rollout worker 13 uses device cpu -[2024-07-05 12:47:57,788][05794] Rollout worker 14 uses device cpu -[2024-07-05 12:47:57,788][05794] Rollout worker 15 uses device cpu -[2024-07-05 12:47:57,895][05794] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:47:57,896][05794] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 12:47:57,990][05794] Starting all processes... -[2024-07-05 12:47:57,991][05794] Starting process learner_proc0 -[2024-07-05 12:47:58,040][05794] Starting all processes... -[2024-07-05 12:47:58,045][05794] Starting process inference_proc0-0 -[2024-07-05 12:47:58,045][05794] Starting process rollout_proc0 -[2024-07-05 12:47:58,045][05794] Starting process rollout_proc1 -[2024-07-05 12:47:58,046][05794] Starting process rollout_proc2 -[2024-07-05 12:47:58,046][05794] Starting process rollout_proc3 -[2024-07-05 12:47:58,046][05794] Starting process rollout_proc4 -[2024-07-05 12:47:58,046][05794] Starting process rollout_proc5 -[2024-07-05 12:47:58,047][05794] Starting process rollout_proc6 -[2024-07-05 12:47:58,051][05794] Starting process rollout_proc7 -[2024-07-05 12:47:58,052][05794] Starting process rollout_proc8 -[2024-07-05 12:47:58,052][05794] Starting process rollout_proc9 -[2024-07-05 12:47:58,054][05794] Starting process rollout_proc10 -[2024-07-05 12:47:58,059][05794] Starting process rollout_proc11 -[2024-07-05 12:47:58,065][05794] Starting process rollout_proc12 -[2024-07-05 12:47:58,065][05794] Starting process rollout_proc13 -[2024-07-05 12:47:58,069][05794] Starting process rollout_proc14 -[2024-07-05 12:47:58,086][05794] Starting process rollout_proc15 -[2024-07-05 12:48:02,073][09124] Worker 4 uses CPU cores [4] -[2024-07-05 12:48:02,184][09128] Worker 7 uses CPU cores [7] -[2024-07-05 12:48:02,205][09149] Worker 14 uses CPU cores [14] -[2024-07-05 12:48:02,329][09148] Worker 12 uses CPU cores [12] -[2024-07-05 12:48:02,335][09099] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:48:02,335][09099] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 12:48:02,430][09129] Worker 9 uses CPU cores [9] -[2024-07-05 12:48:02,439][09099] Num visible devices: 1 -[2024-07-05 12:48:02,482][09099] Setting fixed seed 200 -[2024-07-05 12:48:02,493][09099] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:48:02,494][09099] Initializing actor-critic model on device cuda:0 -[2024-07-05 12:48:02,495][09099] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 12:48:02,496][09099] RunningMeanStd input shape: (1,) -[2024-07-05 12:48:02,497][09126] Worker 5 uses CPU cores [5] -[2024-07-05 12:48:02,505][09151] Worker 15 uses CPU cores [15] -[2024-07-05 12:48:02,510][09099] ConvEncoder: input_channels=3 -[2024-07-05 12:48:02,561][09127] Worker 8 uses CPU cores [8] -[2024-07-05 12:48:02,619][09099] Conv encoder output size: 512 -[2024-07-05 12:48:02,619][09099] Policy head output size: 512 -[2024-07-05 12:48:02,632][09099] Created Actor Critic model with architecture: -[2024-07-05 12:48:02,632][09099] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) + (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (5): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (6): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) + (7): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - ) - ) - ) - (core): ModelCoreRNN( - (core): GRU(512, 512) - ) - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=5, bias=True) - ) -) -[2024-07-05 12:48:02,651][09122] Worker 2 uses CPU cores [2] -[2024-07-05 12:48:02,673][09121] Worker 1 uses CPU cores [1] -[2024-07-05 12:48:02,706][09130] Worker 10 uses CPU cores [10] -[2024-07-05 12:48:02,709][09125] Worker 6 uses CPU cores [6] -[2024-07-05 12:48:02,733][09120] Worker 0 uses CPU cores [0] -[2024-07-05 12:48:02,750][09099] Using optimizer -[2024-07-05 12:48:02,765][09119] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:48:02,765][09119] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 12:48:02,775][09131] Worker 11 uses CPU cores [11] -[2024-07-05 12:48:02,788][09123] Worker 3 uses CPU cores [3] -[2024-07-05 12:48:02,811][09119] Num visible devices: 1 -[2024-07-05 12:48:02,999][09147] Worker 13 uses CPU cores [13] -[2024-07-05 12:48:03,427][09099] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000026858_200015872.pth... -[2024-07-05 12:48:03,453][09099] Loading model from checkpoint -[2024-07-05 12:48:03,454][09099] Loaded experiment state at self.train_step=26858, self.env_steps=200015872 -[2024-07-05 12:48:03,455][09099] Initialized policy 0 weights for model version 26858 -[2024-07-05 12:48:03,456][09099] LearnerWorker_p0 finished initialization! -[2024-07-05 12:48:03,456][09099] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 12:48:03,529][09119] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 12:48:03,530][09119] RunningMeanStd input shape: (1,) -[2024-07-05 12:48:03,538][09119] ConvEncoder: input_channels=3 -[2024-07-05 12:48:03,597][09119] Conv encoder output size: 512 -[2024-07-05 12:48:03,597][09119] Policy head output size: 512 -[2024-07-05 12:48:03,633][05794] Inference worker 0-0 is ready! -[2024-07-05 12:48:03,634][05794] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 12:48:03,704][09125] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,706][09126] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,706][09129] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,710][09127] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,711][09131] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,711][09123] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,711][09149] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,712][09120] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,715][09147] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,715][09151] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,719][09121] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,720][09124] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,725][09148] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,726][09122] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,736][09130] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:03,860][09128] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 12:48:04,347][09127] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,351][09120] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,351][09125] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,351][09131] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,352][09148] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,355][09124] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,355][09126] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,420][05794] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 200015872. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 12:48:04,531][09127] Decorrelating experience for 32 frames... -[2024-07-05 12:48:04,533][09125] Decorrelating experience for 32 frames... -[2024-07-05 12:48:04,533][09148] Decorrelating experience for 32 frames... -[2024-07-05 12:48:04,593][09120] Decorrelating experience for 32 frames... -[2024-07-05 12:48:04,602][09122] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,618][09123] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,624][09121] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,720][09130] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,732][09126] Decorrelating experience for 32 frames... -[2024-07-05 12:48:04,743][09148] Decorrelating experience for 64 frames... -[2024-07-05 12:48:04,753][09127] Decorrelating experience for 64 frames... -[2024-07-05 12:48:04,768][09125] Decorrelating experience for 64 frames... -[2024-07-05 12:48:04,800][09121] Decorrelating experience for 32 frames... -[2024-07-05 12:48:04,863][09147] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,874][09129] Decorrelating experience for 0 frames... -[2024-07-05 12:48:04,961][09130] Decorrelating experience for 32 frames... -[2024-07-05 12:48:04,981][09126] Decorrelating experience for 64 frames... -[2024-07-05 12:48:04,988][09125] Decorrelating experience for 96 frames... -[2024-07-05 12:48:05,018][09122] Decorrelating experience for 32 frames... -[2024-07-05 12:48:05,035][09123] Decorrelating experience for 32 frames... -[2024-07-05 12:48:05,043][09121] Decorrelating experience for 64 frames... -[2024-07-05 12:48:05,051][09127] Decorrelating experience for 96 frames... -[2024-07-05 12:48:05,060][09124] Decorrelating experience for 32 frames... -[2024-07-05 12:48:05,146][09129] Decorrelating experience for 32 frames... -[2024-07-05 12:48:05,153][09120] Decorrelating experience for 64 frames... -[2024-07-05 12:48:05,206][09131] Decorrelating experience for 32 frames... -[2024-07-05 12:48:05,239][09149] Decorrelating experience for 0 frames... -[2024-07-05 12:48:05,259][09147] Decorrelating experience for 32 frames... -[2024-07-05 12:48:05,295][09124] Decorrelating experience for 64 frames... -[2024-07-05 12:48:05,351][09151] Decorrelating experience for 0 frames... -[2024-07-05 12:48:05,355][09120] Decorrelating experience for 96 frames... -[2024-07-05 12:48:05,397][09128] Decorrelating experience for 0 frames... -[2024-07-05 12:48:05,400][09126] Decorrelating experience for 96 frames... -[2024-07-05 12:48:05,474][09129] Decorrelating experience for 64 frames... -[2024-07-05 12:48:05,520][09147] Decorrelating experience for 64 frames... -[2024-07-05 12:48:05,553][09151] Decorrelating experience for 32 frames... -[2024-07-05 12:48:05,595][09148] Decorrelating experience for 96 frames... -[2024-07-05 12:48:05,618][09127] Decorrelating experience for 128 frames... -[2024-07-05 12:48:05,630][09124] Decorrelating experience for 96 frames... -[2024-07-05 12:48:05,676][09128] Decorrelating experience for 32 frames... -[2024-07-05 12:48:05,676][09125] Decorrelating experience for 128 frames... -[2024-07-05 12:48:05,723][09120] Decorrelating experience for 128 frames... -[2024-07-05 12:48:05,753][09121] Decorrelating experience for 96 frames... -[2024-07-05 12:48:05,824][09147] Decorrelating experience for 96 frames... -[2024-07-05 12:48:05,858][09123] Decorrelating experience for 64 frames... -[2024-07-05 12:48:05,883][09126] Decorrelating experience for 128 frames... -[2024-07-05 12:48:05,892][09130] Decorrelating experience for 64 frames... -[2024-07-05 12:48:05,939][09125] Decorrelating experience for 160 frames... -[2024-07-05 12:48:05,972][09129] Decorrelating experience for 96 frames... -[2024-07-05 12:48:06,007][09149] Decorrelating experience for 32 frames... -[2024-07-05 12:48:06,017][09148] Decorrelating experience for 128 frames... -[2024-07-05 12:48:06,069][09124] Decorrelating experience for 128 frames... -[2024-07-05 12:48:06,073][09120] Decorrelating experience for 160 frames... -[2024-07-05 12:48:06,112][09122] Decorrelating experience for 64 frames... -[2024-07-05 12:48:06,182][09130] Decorrelating experience for 96 frames... -[2024-07-05 12:48:06,221][09125] Decorrelating experience for 192 frames... -[2024-07-05 12:48:06,251][09126] Decorrelating experience for 160 frames... -[2024-07-05 12:48:06,260][09147] Decorrelating experience for 128 frames... -[2024-07-05 12:48:06,315][09149] Decorrelating experience for 64 frames... -[2024-07-05 12:48:06,315][09151] Decorrelating experience for 64 frames... -[2024-07-05 12:48:06,316][09121] Decorrelating experience for 128 frames... -[2024-07-05 12:48:06,336][09127] Decorrelating experience for 160 frames... -[2024-07-05 12:48:06,425][09124] Decorrelating experience for 160 frames... -[2024-07-05 12:48:06,503][09130] Decorrelating experience for 128 frames... -[2024-07-05 12:48:06,518][09122] Decorrelating experience for 96 frames... -[2024-07-05 12:48:06,518][09147] Decorrelating experience for 160 frames... -[2024-07-05 12:48:06,563][09129] Decorrelating experience for 128 frames... -[2024-07-05 12:48:06,564][09128] Decorrelating experience for 64 frames... -[2024-07-05 12:48:06,568][09120] Decorrelating experience for 192 frames... -[2024-07-05 12:48:06,577][09123] Decorrelating experience for 96 frames... -[2024-07-05 12:48:06,608][09126] Decorrelating experience for 192 frames... -[2024-07-05 12:48:06,726][09149] Decorrelating experience for 96 frames... -[2024-07-05 12:48:06,796][09131] Decorrelating experience for 64 frames... -[2024-07-05 12:48:06,810][09128] Decorrelating experience for 96 frames... -[2024-07-05 12:48:06,820][09124] Decorrelating experience for 192 frames... -[2024-07-05 12:48:06,821][09147] Decorrelating experience for 192 frames... -[2024-07-05 12:48:06,918][09129] Decorrelating experience for 160 frames... -[2024-07-05 12:48:06,918][09120] Decorrelating experience for 224 frames... -[2024-07-05 12:48:06,923][09127] Decorrelating experience for 192 frames... -[2024-07-05 12:48:07,015][09121] Decorrelating experience for 160 frames... -[2024-07-05 12:48:07,037][09151] Decorrelating experience for 96 frames... -[2024-07-05 12:48:07,038][09131] Decorrelating experience for 96 frames... -[2024-07-05 12:48:07,107][09123] Decorrelating experience for 128 frames... -[2024-07-05 12:48:07,129][09122] Decorrelating experience for 128 frames... -[2024-07-05 12:48:07,206][09126] Decorrelating experience for 224 frames... -[2024-07-05 12:48:07,250][09124] Decorrelating experience for 224 frames... -[2024-07-05 12:48:07,285][09129] Decorrelating experience for 192 frames... -[2024-07-05 12:48:07,315][09130] Decorrelating experience for 160 frames... -[2024-07-05 12:48:07,338][09148] Decorrelating experience for 160 frames... -[2024-07-05 12:48:07,431][09122] Decorrelating experience for 160 frames... -[2024-07-05 12:48:07,449][09149] Decorrelating experience for 128 frames... -[2024-07-05 12:48:07,467][09151] Decorrelating experience for 128 frames... -[2024-07-05 12:48:07,474][09131] Decorrelating experience for 128 frames... -[2024-07-05 12:48:07,565][09127] Decorrelating experience for 224 frames... -[2024-07-05 12:48:07,601][09147] Decorrelating experience for 224 frames... -[2024-07-05 12:48:07,605][09125] Decorrelating experience for 224 frames... -[2024-07-05 12:48:07,678][09129] Decorrelating experience for 224 frames... -[2024-07-05 12:48:07,737][09130] Decorrelating experience for 192 frames... -[2024-07-05 12:48:07,796][09149] Decorrelating experience for 160 frames... -[2024-07-05 12:48:07,874][09121] Decorrelating experience for 192 frames... -[2024-07-05 12:48:07,895][09151] Decorrelating experience for 160 frames... -[2024-07-05 12:48:07,924][09128] Decorrelating experience for 128 frames... -[2024-07-05 12:48:07,988][09131] Decorrelating experience for 160 frames... -[2024-07-05 12:48:08,173][09149] Decorrelating experience for 192 frames... -[2024-07-05 12:48:08,176][09130] Decorrelating experience for 224 frames... -[2024-07-05 12:48:08,204][09123] Decorrelating experience for 160 frames... -[2024-07-05 12:48:08,268][09121] Decorrelating experience for 224 frames... -[2024-07-05 12:48:08,271][09148] Decorrelating experience for 192 frames... -[2024-07-05 12:48:08,280][09128] Decorrelating experience for 160 frames... -[2024-07-05 12:48:08,387][09131] Decorrelating experience for 192 frames... -[2024-07-05 12:48:08,519][09122] Decorrelating experience for 192 frames... -[2024-07-05 12:48:08,572][09149] Decorrelating experience for 224 frames... -[2024-07-05 12:48:08,653][09128] Decorrelating experience for 192 frames... -[2024-07-05 12:48:08,688][09148] Decorrelating experience for 224 frames... -[2024-07-05 12:48:08,758][09123] Decorrelating experience for 192 frames... -[2024-07-05 12:48:08,770][09099] Signal inference workers to stop experience collection... -[2024-07-05 12:48:08,776][09119] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 12:48:08,817][09131] Decorrelating experience for 224 frames... -[2024-07-05 12:48:08,845][09151] Decorrelating experience for 192 frames... -[2024-07-05 12:48:08,950][09122] Decorrelating experience for 224 frames... -[2024-07-05 12:48:09,020][09128] Decorrelating experience for 224 frames... -[2024-07-05 12:48:09,178][09123] Decorrelating experience for 224 frames... -[2024-07-05 12:48:09,189][09151] Decorrelating experience for 224 frames... -[2024-07-05 12:48:09,420][05794] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 200015872. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 12:48:09,421][05794] Avg episode reward: [(0, '2.183')] -[2024-07-05 12:48:10,366][09099] Signal inference workers to resume experience collection... -[2024-07-05 12:48:10,367][09119] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 12:48:12,712][09119] Updated weights for policy 0, policy_version 26868 (0.0095) -[2024-07-05 12:48:14,420][05794] Fps is (10 sec: 13926.2, 60 sec: 13926.2, 300 sec: 13926.2). Total num frames: 200155136. Throughput: 0: 2250.0. Samples: 22500. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 12:48:14,422][05794] Avg episode reward: [(0, '7.767')] -[2024-07-05 12:48:15,120][09119] Updated weights for policy 0, policy_version 26878 (0.0010) -[2024-07-05 12:48:17,539][09119] Updated weights for policy 0, policy_version 26888 (0.0010) -[2024-07-05 12:48:17,883][05794] Heartbeat connected on Batcher_0 -[2024-07-05 12:48:17,888][05794] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 12:48:17,903][05794] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 12:48:17,905][05794] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 12:48:17,906][05794] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 12:48:17,911][05794] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 12:48:17,912][05794] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 12:48:17,919][05794] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 12:48:17,926][05794] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 12:48:17,926][05794] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 12:48:17,927][05794] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 12:48:17,973][05794] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 12:48:17,974][05794] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 12:48:17,977][05794] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 12:48:17,979][05794] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 12:48:17,984][05794] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 12:48:17,990][05794] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 12:48:17,995][05794] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 12:48:17,997][05794] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 12:48:19,420][05794] Fps is (10 sec: 31129.8, 60 sec: 20753.1, 300 sec: 20753.1). Total num frames: 200327168. Throughput: 0: 4937.1. Samples: 74056. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 12:48:19,421][05794] Avg episode reward: [(0, '55.444')] -[2024-07-05 12:48:19,422][09099] Saving new best policy, reward=55.444! -[2024-07-05 12:48:19,863][09119] Updated weights for policy 0, policy_version 26898 (0.0010) -[2024-07-05 12:48:22,144][09119] Updated weights for policy 0, policy_version 26908 (0.0010) -[2024-07-05 12:48:24,345][09119] Updated weights for policy 0, policy_version 26918 (0.0010) -[2024-07-05 12:48:24,420][05794] Fps is (10 sec: 35226.0, 60 sec: 24576.0, 300 sec: 24576.0). Total num frames: 200507392. Throughput: 0: 5030.8. Samples: 100616. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 12:48:24,421][05794] Avg episode reward: [(0, '50.190')] -[2024-07-05 12:48:26,516][09119] Updated weights for policy 0, policy_version 26928 (0.0011) -[2024-07-05 12:48:28,798][09119] Updated weights for policy 0, policy_version 26938 (0.0012) -[2024-07-05 12:48:29,420][05794] Fps is (10 sec: 36863.9, 60 sec: 27197.4, 300 sec: 27197.4). Total num frames: 200695808. Throughput: 0: 6237.4. Samples: 155936. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:48:29,421][05794] Avg episode reward: [(0, '51.687')] -[2024-07-05 12:48:30,966][09119] Updated weights for policy 0, policy_version 26948 (0.0009) -[2024-07-05 12:48:33,226][09119] Updated weights for policy 0, policy_version 26958 (0.0013) -[2024-07-05 12:48:34,429][05794] Fps is (10 sec: 36831.7, 60 sec: 28663.6, 300 sec: 28663.6). Total num frames: 200876032. Throughput: 0: 7059.4. Samples: 211844. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:48:34,431][05794] Avg episode reward: [(0, '53.481')] -[2024-07-05 12:48:35,371][09119] Updated weights for policy 0, policy_version 26968 (0.0011) -[2024-07-05 12:48:37,553][09119] Updated weights for policy 0, policy_version 26978 (0.0008) -[2024-07-05 12:48:39,420][05794] Fps is (10 sec: 36863.8, 60 sec: 29959.3, 300 sec: 29959.3). Total num frames: 201064448. Throughput: 0: 6859.0. Samples: 240064. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:48:39,421][05794] Avg episode reward: [(0, '51.591')] -[2024-07-05 12:48:39,790][09119] Updated weights for policy 0, policy_version 26988 (0.0009) -[2024-07-05 12:48:41,933][09119] Updated weights for policy 0, policy_version 26998 (0.0009) -[2024-07-05 12:48:44,203][09119] Updated weights for policy 0, policy_version 27008 (0.0009) -[2024-07-05 12:48:44,420][05794] Fps is (10 sec: 37715.6, 60 sec: 30924.7, 300 sec: 30924.7). Total num frames: 201252864. Throughput: 0: 7394.9. Samples: 295796. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:48:44,421][05794] Avg episode reward: [(0, '52.844')] -[2024-07-05 12:48:46,386][09119] Updated weights for policy 0, policy_version 27018 (0.0009) -[2024-07-05 12:48:48,610][09119] Updated weights for policy 0, policy_version 27028 (0.0010) -[2024-07-05 12:48:49,427][05794] Fps is (10 sec: 36840.9, 60 sec: 31489.3, 300 sec: 31489.3). Total num frames: 201433088. Throughput: 0: 7805.7. Samples: 351304. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:48:49,428][05794] Avg episode reward: [(0, '51.655')] -[2024-07-05 12:48:50,782][09119] Updated weights for policy 0, policy_version 27038 (0.0010) -[2024-07-05 12:48:53,031][09119] Updated weights for policy 0, policy_version 27048 (0.0009) -[2024-07-05 12:48:54,420][05794] Fps is (10 sec: 36864.6, 60 sec: 32112.6, 300 sec: 32112.6). Total num frames: 201621504. Throughput: 0: 8434.0. Samples: 379528. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:48:54,421][05794] Avg episode reward: [(0, '52.356')] -[2024-07-05 12:48:55,173][09119] Updated weights for policy 0, policy_version 27058 (0.0009) -[2024-07-05 12:48:57,352][09119] Updated weights for policy 0, policy_version 27068 (0.0011) -[2024-07-05 12:48:59,420][05794] Fps is (10 sec: 37707.3, 60 sec: 32619.1, 300 sec: 32619.1). Total num frames: 201809920. Throughput: 0: 9181.6. Samples: 435672. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:48:59,421][05794] Avg episode reward: [(0, '50.218')] -[2024-07-05 12:48:59,495][09119] Updated weights for policy 0, policy_version 27078 (0.0009) -[2024-07-05 12:49:01,670][09119] Updated weights for policy 0, policy_version 27088 (0.0010) -[2024-07-05 12:49:03,854][09119] Updated weights for policy 0, policy_version 27098 (0.0012) -[2024-07-05 12:49:04,420][05794] Fps is (10 sec: 37682.6, 60 sec: 33041.0, 300 sec: 33041.0). Total num frames: 201998336. Throughput: 0: 9292.4. Samples: 492216. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:04,422][05794] Avg episode reward: [(0, '51.405')] -[2024-07-05 12:49:06,015][09119] Updated weights for policy 0, policy_version 27108 (0.0009) -[2024-07-05 12:49:07,760][09119] Updated weights for policy 0, policy_version 27118 (0.0015) -[2024-07-05 12:49:09,420][05794] Fps is (10 sec: 40960.1, 60 sec: 36727.5, 300 sec: 33902.3). Total num frames: 202219520. Throughput: 0: 9346.2. Samples: 521196. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:09,421][05794] Avg episode reward: [(0, '51.848')] -[2024-07-05 12:49:09,467][09119] Updated weights for policy 0, policy_version 27128 (0.0008) -[2024-07-05 12:49:11,154][09119] Updated weights for policy 0, policy_version 27138 (0.0010) -[2024-07-05 12:49:12,836][09119] Updated weights for policy 0, policy_version 27148 (0.0010) -[2024-07-05 12:49:14,420][05794] Fps is (10 sec: 46695.2, 60 sec: 38502.5, 300 sec: 34991.5). Total num frames: 202465280. Throughput: 0: 9740.2. Samples: 594244. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:14,421][05794] Avg episode reward: [(0, '51.512')] -[2024-07-05 12:49:14,531][09119] Updated weights for policy 0, policy_version 27158 (0.0008) -[2024-07-05 12:49:16,182][09119] Updated weights for policy 0, policy_version 27168 (0.0007) -[2024-07-05 12:49:17,936][09119] Updated weights for policy 0, policy_version 27178 (0.0007) -[2024-07-05 12:49:19,420][05794] Fps is (10 sec: 48332.6, 60 sec: 39594.7, 300 sec: 35826.4). Total num frames: 202702848. Throughput: 0: 10113.9. Samples: 666880. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:19,421][05794] Avg episode reward: [(0, '53.590')] -[2024-07-05 12:49:19,606][09119] Updated weights for policy 0, policy_version 27188 (0.0008) -[2024-07-05 12:49:21,254][09119] Updated weights for policy 0, policy_version 27198 (0.0009) -[2024-07-05 12:49:22,952][09119] Updated weights for policy 0, policy_version 27208 (0.0014) -[2024-07-05 12:49:24,420][05794] Fps is (10 sec: 48332.8, 60 sec: 40686.9, 300 sec: 36659.2). Total num frames: 202948608. Throughput: 0: 10296.5. Samples: 703408. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:24,421][05794] Avg episode reward: [(0, '52.303')] -[2024-07-05 12:49:24,647][09119] Updated weights for policy 0, policy_version 27218 (0.0007) -[2024-07-05 12:49:26,366][09119] Updated weights for policy 0, policy_version 27228 (0.0009) -[2024-07-05 12:49:28,068][09119] Updated weights for policy 0, policy_version 27238 (0.0007) -[2024-07-05 12:49:29,420][05794] Fps is (10 sec: 49152.1, 60 sec: 41642.7, 300 sec: 37394.1). Total num frames: 203194368. Throughput: 0: 10677.6. Samples: 776284. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:29,421][05794] Avg episode reward: [(0, '50.338')] -[2024-07-05 12:49:29,716][09119] Updated weights for policy 0, policy_version 27248 (0.0010) -[2024-07-05 12:49:31,421][09119] Updated weights for policy 0, policy_version 27258 (0.0009) -[2024-07-05 12:49:33,097][09119] Updated weights for policy 0, policy_version 27268 (0.0008) -[2024-07-05 12:49:34,420][05794] Fps is (10 sec: 48333.1, 60 sec: 42604.7, 300 sec: 37956.3). Total num frames: 203431936. Throughput: 0: 11063.6. Samples: 849096. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:34,421][05794] Avg episode reward: [(0, '51.086')] -[2024-07-05 12:49:34,778][09119] Updated weights for policy 0, policy_version 27278 (0.0007) -[2024-07-05 12:49:36,406][09119] Updated weights for policy 0, policy_version 27288 (0.0008) -[2024-07-05 12:49:38,104][09119] Updated weights for policy 0, policy_version 27298 (0.0008) -[2024-07-05 12:49:39,420][05794] Fps is (10 sec: 48332.5, 60 sec: 43554.2, 300 sec: 38545.5). Total num frames: 203677696. Throughput: 0: 11245.7. Samples: 885584. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:39,421][05794] Avg episode reward: [(0, '48.010')] -[2024-07-05 12:49:39,776][09119] Updated weights for policy 0, policy_version 27308 (0.0009) -[2024-07-05 12:49:41,483][09119] Updated weights for policy 0, policy_version 27318 (0.0008) -[2024-07-05 12:49:43,158][09119] Updated weights for policy 0, policy_version 27328 (0.0007) -[2024-07-05 12:49:44,420][05794] Fps is (10 sec: 49150.9, 60 sec: 44509.9, 300 sec: 39075.8). Total num frames: 203923456. Throughput: 0: 11624.9. Samples: 958796. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:44,421][05794] Avg episode reward: [(0, '47.915')] -[2024-07-05 12:49:44,849][09119] Updated weights for policy 0, policy_version 27338 (0.0008) -[2024-07-05 12:49:46,541][09119] Updated weights for policy 0, policy_version 27348 (0.0007) -[2024-07-05 12:49:48,239][09119] Updated weights for policy 0, policy_version 27358 (0.0008) -[2024-07-05 12:49:49,420][05794] Fps is (10 sec: 49152.3, 60 sec: 45607.0, 300 sec: 39555.7). Total num frames: 204169216. Throughput: 0: 11985.9. Samples: 1031580. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:49,421][05794] Avg episode reward: [(0, '50.782')] -[2024-07-05 12:49:49,903][09119] Updated weights for policy 0, policy_version 27368 (0.0009) -[2024-07-05 12:49:51,564][09119] Updated weights for policy 0, policy_version 27378 (0.0007) -[2024-07-05 12:49:53,264][09119] Updated weights for policy 0, policy_version 27388 (0.0010) -[2024-07-05 12:49:54,420][05794] Fps is (10 sec: 48333.7, 60 sec: 46421.4, 300 sec: 39917.4). Total num frames: 204406784. Throughput: 0: 12157.1. Samples: 1068268. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:54,421][05794] Avg episode reward: [(0, '49.126')] -[2024-07-05 12:49:54,471][09099] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000027395_204414976.pth... -[2024-07-05 12:49:54,541][09099] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000026653_198336512.pth -[2024-07-05 12:49:54,970][09119] Updated weights for policy 0, policy_version 27398 (0.0008) -[2024-07-05 12:49:56,620][09119] Updated weights for policy 0, policy_version 27408 (0.0009) -[2024-07-05 12:49:58,324][09119] Updated weights for policy 0, policy_version 27418 (0.0010) -[2024-07-05 12:49:59,420][05794] Fps is (10 sec: 48332.5, 60 sec: 47377.0, 300 sec: 40318.9). Total num frames: 204652544. Throughput: 0: 12155.5. Samples: 1141240. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:49:59,421][05794] Avg episode reward: [(0, '48.799')] -[2024-07-05 12:49:59,993][09119] Updated weights for policy 0, policy_version 27428 (0.0008) -[2024-07-05 12:50:01,701][09119] Updated weights for policy 0, policy_version 27438 (0.0008) -[2024-07-05 12:50:03,375][09119] Updated weights for policy 0, policy_version 27448 (0.0009) -[2024-07-05 12:50:04,420][05794] Fps is (10 sec: 49152.0, 60 sec: 48333.0, 300 sec: 40686.9). Total num frames: 204898304. Throughput: 0: 12162.0. Samples: 1214172. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:50:04,421][05794] Avg episode reward: [(0, '52.361')] -[2024-07-05 12:50:05,079][09119] Updated weights for policy 0, policy_version 27458 (0.0011) -[2024-07-05 12:50:06,750][09119] Updated weights for policy 0, policy_version 27468 (0.0010) -[2024-07-05 12:50:08,455][09119] Updated weights for policy 0, policy_version 27478 (0.0008) -[2024-07-05 12:50:09,420][05794] Fps is (10 sec: 48332.1, 60 sec: 48605.7, 300 sec: 40959.9). Total num frames: 205135872. Throughput: 0: 12166.6. Samples: 1250908. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:50:09,421][05794] Avg episode reward: [(0, '49.252')] -[2024-07-05 12:50:10,084][09119] Updated weights for policy 0, policy_version 27488 (0.0008) -[2024-07-05 12:50:11,775][09119] Updated weights for policy 0, policy_version 27498 (0.0007) -[2024-07-05 12:50:13,446][09119] Updated weights for policy 0, policy_version 27508 (0.0008) -[2024-07-05 12:50:14,420][05794] Fps is (10 sec: 48332.7, 60 sec: 48605.9, 300 sec: 41275.1). Total num frames: 205381632. Throughput: 0: 12168.3. Samples: 1323856. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:50:14,421][05794] Avg episode reward: [(0, '50.335')] -[2024-07-05 12:50:15,112][09119] Updated weights for policy 0, policy_version 27518 (0.0010) -[2024-07-05 12:50:16,791][09119] Updated weights for policy 0, policy_version 27528 (0.0008) -[2024-07-05 12:50:18,519][09119] Updated weights for policy 0, policy_version 27538 (0.0010) -[2024-07-05 12:50:19,420][05794] Fps is (10 sec: 49152.7, 60 sec: 48742.4, 300 sec: 41566.8). Total num frames: 205627392. Throughput: 0: 12173.1. Samples: 1396888. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:50:19,421][05794] Avg episode reward: [(0, '52.391')] -[2024-07-05 12:50:20,250][09119] Updated weights for policy 0, policy_version 27548 (0.0008) -[2024-07-05 12:50:21,952][09119] Updated weights for policy 0, policy_version 27558 (0.0008) -[2024-07-05 12:50:23,627][09119] Updated weights for policy 0, policy_version 27568 (0.0008) -[2024-07-05 12:50:24,420][05794] Fps is (10 sec: 48332.4, 60 sec: 48605.8, 300 sec: 41779.2). Total num frames: 205864960. Throughput: 0: 12150.3. Samples: 1432348. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:50:24,421][05794] Avg episode reward: [(0, '51.673')] -[2024-07-05 12:50:25,294][09119] Updated weights for policy 0, policy_version 27578 (0.0010) -[2024-07-05 12:50:26,992][09119] Updated weights for policy 0, policy_version 27588 (0.0011) -[2024-07-05 12:50:28,646][09119] Updated weights for policy 0, policy_version 27598 (0.0008) -[2024-07-05 12:50:29,420][05794] Fps is (10 sec: 48332.6, 60 sec: 48605.8, 300 sec: 42033.4). Total num frames: 206110720. Throughput: 0: 12148.9. Samples: 1505496. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:50:29,422][05794] Avg episode reward: [(0, '48.058')] -[2024-07-05 12:50:30,369][09119] Updated weights for policy 0, policy_version 27608 (0.0010) -[2024-07-05 12:50:32,056][09119] Updated weights for policy 0, policy_version 27618 (0.0012) -[2024-07-05 12:50:33,737][09119] Updated weights for policy 0, policy_version 27628 (0.0008) -[2024-07-05 12:50:34,420][05794] Fps is (10 sec: 49152.2, 60 sec: 48742.3, 300 sec: 42270.7). Total num frames: 206356480. Throughput: 0: 12156.0. Samples: 1578600. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:50:34,421][05794] Avg episode reward: [(0, '52.572')] -[2024-07-05 12:50:35,377][09119] Updated weights for policy 0, policy_version 27638 (0.0007) -[2024-07-05 12:50:37,108][09119] Updated weights for policy 0, policy_version 27648 (0.0008) -[2024-07-05 12:50:38,844][09119] Updated weights for policy 0, policy_version 27658 (0.0007) -[2024-07-05 12:50:39,420][05794] Fps is (10 sec: 48333.1, 60 sec: 48605.9, 300 sec: 42439.8). Total num frames: 206594048. Throughput: 0: 12151.2. Samples: 1615072. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:50:39,421][05794] Avg episode reward: [(0, '51.798')] -[2024-07-05 12:50:40,507][09119] Updated weights for policy 0, policy_version 27668 (0.0007) -[2024-07-05 12:50:42,195][09119] Updated weights for policy 0, policy_version 27678 (0.0008) -[2024-07-05 12:50:43,838][09119] Updated weights for policy 0, policy_version 27688 (0.0007) -[2024-07-05 12:50:44,420][05794] Fps is (10 sec: 48333.1, 60 sec: 48606.0, 300 sec: 42649.6). Total num frames: 206839808. Throughput: 0: 12144.1. Samples: 1687724. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:50:44,421][05794] Avg episode reward: [(0, '50.273')] -[2024-07-05 12:50:45,511][09119] Updated weights for policy 0, policy_version 27698 (0.0010) -[2024-07-05 12:50:47,169][09119] Updated weights for policy 0, policy_version 27708 (0.0007) -[2024-07-05 12:50:48,860][09119] Updated weights for policy 0, policy_version 27718 (0.0008) -[2024-07-05 12:50:49,420][05794] Fps is (10 sec: 49152.3, 60 sec: 48605.9, 300 sec: 42846.7). Total num frames: 207085568. Throughput: 0: 12152.8. Samples: 1761048. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:50:49,421][05794] Avg episode reward: [(0, '50.620')] -[2024-07-05 12:50:50,542][09119] Updated weights for policy 0, policy_version 27728 (0.0012) -[2024-07-05 12:50:52,238][09119] Updated weights for policy 0, policy_version 27738 (0.0010) -[2024-07-05 12:50:53,927][09119] Updated weights for policy 0, policy_version 27748 (0.0009) -[2024-07-05 12:50:54,420][05794] Fps is (10 sec: 48332.4, 60 sec: 48605.8, 300 sec: 42983.9). Total num frames: 207323136. Throughput: 0: 12149.3. Samples: 1797624. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:50:54,421][05794] Avg episode reward: [(0, '53.167')] -[2024-07-05 12:50:55,652][09119] Updated weights for policy 0, policy_version 27758 (0.0007) -[2024-07-05 12:50:57,309][09119] Updated weights for policy 0, policy_version 27768 (0.0007) -[2024-07-05 12:50:59,013][09119] Updated weights for policy 0, policy_version 27778 (0.0007) -[2024-07-05 12:50:59,420][05794] Fps is (10 sec: 48332.6, 60 sec: 48605.9, 300 sec: 43160.1). Total num frames: 207568896. Throughput: 0: 12143.4. Samples: 1870308. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:50:59,421][05794] Avg episode reward: [(0, '52.428')] -[2024-07-05 12:51:00,686][09119] Updated weights for policy 0, policy_version 27788 (0.0007) -[2024-07-05 12:51:02,339][09119] Updated weights for policy 0, policy_version 27798 (0.0008) -[2024-07-05 12:51:04,015][09119] Updated weights for policy 0, policy_version 27808 (0.0007) -[2024-07-05 12:51:04,420][05794] Fps is (10 sec: 49152.4, 60 sec: 48605.9, 300 sec: 43326.6). Total num frames: 207814656. Throughput: 0: 12144.0. Samples: 1943368. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:51:04,421][05794] Avg episode reward: [(0, '52.409')] -[2024-07-05 12:51:05,735][09119] Updated weights for policy 0, policy_version 27818 (0.0007) -[2024-07-05 12:51:07,403][09119] Updated weights for policy 0, policy_version 27828 (0.0007) -[2024-07-05 12:51:09,087][09119] Updated weights for policy 0, policy_version 27838 (0.0008) -[2024-07-05 12:51:09,420][05794] Fps is (10 sec: 49151.9, 60 sec: 48742.5, 300 sec: 43484.0). Total num frames: 208060416. Throughput: 0: 12164.6. Samples: 1979756. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:51:09,421][05794] Avg episode reward: [(0, '52.550')] -[2024-07-05 12:51:10,787][09119] Updated weights for policy 0, policy_version 27848 (0.0008) -[2024-07-05 12:51:12,500][09119] Updated weights for policy 0, policy_version 27858 (0.0008) -[2024-07-05 12:51:14,195][09119] Updated weights for policy 0, policy_version 27868 (0.0008) -[2024-07-05 12:51:14,420][05794] Fps is (10 sec: 48332.6, 60 sec: 48605.8, 300 sec: 43590.1). Total num frames: 208297984. Throughput: 0: 12155.5. Samples: 2052492. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:51:14,421][05794] Avg episode reward: [(0, '50.416')] -[2024-07-05 12:51:15,854][09119] Updated weights for policy 0, policy_version 27878 (0.0008) -[2024-07-05 12:51:17,504][09119] Updated weights for policy 0, policy_version 27888 (0.0008) -[2024-07-05 12:51:19,167][09119] Updated weights for policy 0, policy_version 27898 (0.0007) -[2024-07-05 12:51:19,420][05794] Fps is (10 sec: 48332.1, 60 sec: 48605.8, 300 sec: 43732.6). Total num frames: 208543744. Throughput: 0: 12157.7. Samples: 2125700. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:51:19,421][05794] Avg episode reward: [(0, '52.327')] -[2024-07-05 12:51:20,885][09119] Updated weights for policy 0, policy_version 27908 (0.0009) -[2024-07-05 12:51:22,578][09119] Updated weights for policy 0, policy_version 27918 (0.0007) -[2024-07-05 12:51:24,260][09119] Updated weights for policy 0, policy_version 27928 (0.0007) -[2024-07-05 12:51:24,420][05794] Fps is (10 sec: 49152.3, 60 sec: 48742.5, 300 sec: 43868.2). Total num frames: 208789504. Throughput: 0: 12160.5. Samples: 2162292. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:51:24,421][05794] Avg episode reward: [(0, '50.415')] -[2024-07-05 12:51:25,990][09119] Updated weights for policy 0, policy_version 27938 (0.0008) -[2024-07-05 12:51:27,676][09119] Updated weights for policy 0, policy_version 27948 (0.0008) -[2024-07-05 12:51:29,370][09119] Updated weights for policy 0, policy_version 27958 (0.0010) -[2024-07-05 12:51:29,420][05794] Fps is (10 sec: 48333.6, 60 sec: 48605.9, 300 sec: 43957.1). Total num frames: 209027072. Throughput: 0: 12164.8. Samples: 2235140. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:51:29,421][05794] Avg episode reward: [(0, '48.699')] -[2024-07-05 12:51:31,049][09119] Updated weights for policy 0, policy_version 27968 (0.0007) -[2024-07-05 12:51:32,732][09119] Updated weights for policy 0, policy_version 27978 (0.0008) -[2024-07-05 12:51:34,391][09119] Updated weights for policy 0, policy_version 27988 (0.0010) -[2024-07-05 12:51:34,420][05794] Fps is (10 sec: 48331.8, 60 sec: 48605.8, 300 sec: 44080.7). Total num frames: 209272832. Throughput: 0: 12145.5. Samples: 2307600. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:51:34,421][05794] Avg episode reward: [(0, '49.078')] -[2024-07-05 12:51:36,092][09119] Updated weights for policy 0, policy_version 27998 (0.0008) -[2024-07-05 12:51:37,776][09119] Updated weights for policy 0, policy_version 28008 (0.0007) -[2024-07-05 12:51:39,420][05794] Fps is (10 sec: 48332.5, 60 sec: 48605.8, 300 sec: 44160.6). Total num frames: 209510400. Throughput: 0: 12137.8. Samples: 2343824. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:51:39,421][05794] Avg episode reward: [(0, '51.432')] -[2024-07-05 12:51:39,492][09119] Updated weights for policy 0, policy_version 28018 (0.0010) -[2024-07-05 12:51:41,145][09119] Updated weights for policy 0, policy_version 28028 (0.0007) -[2024-07-05 12:51:42,865][09119] Updated weights for policy 0, policy_version 28038 (0.0008) -[2024-07-05 12:51:44,420][05794] Fps is (10 sec: 48333.6, 60 sec: 48605.8, 300 sec: 44274.0). Total num frames: 209756160. Throughput: 0: 12141.1. Samples: 2416656. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:51:44,421][05794] Avg episode reward: [(0, '51.645')] -[2024-07-05 12:51:44,572][09119] Updated weights for policy 0, policy_version 28048 (0.0008) -[2024-07-05 12:51:46,219][09119] Updated weights for policy 0, policy_version 28058 (0.0008) -[2024-07-05 12:51:47,941][09119] Updated weights for policy 0, policy_version 28068 (0.0010) -[2024-07-05 12:51:49,420][05794] Fps is (10 sec: 49152.1, 60 sec: 48605.8, 300 sec: 44382.4). Total num frames: 210001920. Throughput: 0: 12141.9. Samples: 2489756. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:51:49,421][05794] Avg episode reward: [(0, '50.818')] -[2024-07-05 12:51:49,575][09119] Updated weights for policy 0, policy_version 28078 (0.0008) -[2024-07-05 12:51:51,212][09119] Updated weights for policy 0, policy_version 28088 (0.0010) -[2024-07-05 12:51:52,962][09119] Updated weights for policy 0, policy_version 28098 (0.0008) -[2024-07-05 12:51:54,420][05794] Fps is (10 sec: 48333.1, 60 sec: 48606.0, 300 sec: 44450.5). Total num frames: 210239488. Throughput: 0: 12146.0. Samples: 2526324. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:51:54,421][05794] Avg episode reward: [(0, '52.200')] -[2024-07-05 12:51:54,464][09099] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000028107_210247680.pth... -[2024-07-05 12:51:54,533][09099] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000026858_200015872.pth -[2024-07-05 12:51:54,637][09119] Updated weights for policy 0, policy_version 28108 (0.0008) -[2024-07-05 12:51:56,340][09119] Updated weights for policy 0, policy_version 28118 (0.0007) -[2024-07-05 12:51:58,043][09119] Updated weights for policy 0, policy_version 28128 (0.0008) -[2024-07-05 12:51:59,420][05794] Fps is (10 sec: 48333.0, 60 sec: 48605.9, 300 sec: 44550.5). Total num frames: 210485248. Throughput: 0: 12144.2. Samples: 2598980. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:51:59,421][05794] Avg episode reward: [(0, '53.477')] -[2024-07-05 12:51:59,722][09119] Updated weights for policy 0, policy_version 28138 (0.0008) -[2024-07-05 12:52:01,406][09119] Updated weights for policy 0, policy_version 28148 (0.0007) -[2024-07-05 12:52:03,134][09119] Updated weights for policy 0, policy_version 28158 (0.0008) -[2024-07-05 12:52:04,420][05794] Fps is (10 sec: 48332.7, 60 sec: 48469.4, 300 sec: 44612.3). Total num frames: 210722816. Throughput: 0: 12127.0. Samples: 2671412. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:52:04,421][05794] Avg episode reward: [(0, '51.885')] -[2024-07-05 12:52:04,786][09119] Updated weights for policy 0, policy_version 28168 (0.0009) -[2024-07-05 12:52:06,481][09119] Updated weights for policy 0, policy_version 28178 (0.0008) -[2024-07-05 12:52:08,156][09119] Updated weights for policy 0, policy_version 28188 (0.0008) -[2024-07-05 12:52:09,420][05794] Fps is (10 sec: 48332.3, 60 sec: 48469.3, 300 sec: 44704.9). Total num frames: 210968576. Throughput: 0: 12120.6. Samples: 2707720. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:52:09,421][05794] Avg episode reward: [(0, '53.530')] -[2024-07-05 12:52:09,871][09119] Updated weights for policy 0, policy_version 28198 (0.0008) -[2024-07-05 12:52:11,598][09119] Updated weights for policy 0, policy_version 28208 (0.0010) -[2024-07-05 12:52:13,287][09119] Updated weights for policy 0, policy_version 28218 (0.0007) -[2024-07-05 12:52:14,420][05794] Fps is (10 sec: 49151.7, 60 sec: 48605.9, 300 sec: 44793.9). Total num frames: 211214336. Throughput: 0: 12126.3. Samples: 2780824. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:52:14,421][05794] Avg episode reward: [(0, '47.727')] -[2024-07-05 12:52:14,937][09119] Updated weights for policy 0, policy_version 28228 (0.0008) -[2024-07-05 12:52:16,587][09119] Updated weights for policy 0, policy_version 28238 (0.0008) -[2024-07-05 12:52:18,275][09119] Updated weights for policy 0, policy_version 28248 (0.0009) -[2024-07-05 12:52:19,420][05794] Fps is (10 sec: 49152.8, 60 sec: 48606.0, 300 sec: 44879.3). Total num frames: 211460096. Throughput: 0: 12139.7. Samples: 2853884. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:52:19,421][05794] Avg episode reward: [(0, '51.116')] -[2024-07-05 12:52:19,972][09119] Updated weights for policy 0, policy_version 28258 (0.0007) -[2024-07-05 12:52:21,660][09119] Updated weights for policy 0, policy_version 28268 (0.0008) -[2024-07-05 12:52:23,331][09119] Updated weights for policy 0, policy_version 28278 (0.0007) -[2024-07-05 12:52:24,420][05794] Fps is (10 sec: 48332.6, 60 sec: 48469.3, 300 sec: 44930.0). Total num frames: 211697664. Throughput: 0: 12147.9. Samples: 2890480. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:52:24,421][05794] Avg episode reward: [(0, '50.497')] -[2024-07-05 12:52:25,023][09119] Updated weights for policy 0, policy_version 28288 (0.0007) -[2024-07-05 12:52:26,662][09119] Updated weights for policy 0, policy_version 28298 (0.0008) -[2024-07-05 12:52:28,405][09119] Updated weights for policy 0, policy_version 28308 (0.0008) -[2024-07-05 12:52:29,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48605.9, 300 sec: 45009.6). Total num frames: 211943424. Throughput: 0: 12154.1. Samples: 2963588. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:52:29,421][05794] Avg episode reward: [(0, '49.612')] -[2024-07-05 12:52:30,062][09119] Updated weights for policy 0, policy_version 28318 (0.0007) -[2024-07-05 12:52:31,775][09119] Updated weights for policy 0, policy_version 28328 (0.0008) -[2024-07-05 12:52:33,445][09119] Updated weights for policy 0, policy_version 28338 (0.0007) -[2024-07-05 12:52:34,420][05794] Fps is (10 sec: 48333.2, 60 sec: 48469.5, 300 sec: 45056.0). Total num frames: 212180992. Throughput: 0: 12148.3. Samples: 3036428. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:52:34,421][05794] Avg episode reward: [(0, '51.692')] -[2024-07-05 12:52:35,120][09119] Updated weights for policy 0, policy_version 28348 (0.0010) -[2024-07-05 12:52:36,787][09119] Updated weights for policy 0, policy_version 28358 (0.0007) -[2024-07-05 12:52:38,465][09119] Updated weights for policy 0, policy_version 28368 (0.0008) -[2024-07-05 12:52:39,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48606.0, 300 sec: 45130.5). Total num frames: 212426752. Throughput: 0: 12139.8. Samples: 3072616. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:52:39,421][05794] Avg episode reward: [(0, '49.436')] -[2024-07-05 12:52:40,138][09119] Updated weights for policy 0, policy_version 28378 (0.0010) -[2024-07-05 12:52:41,828][09119] Updated weights for policy 0, policy_version 28388 (0.0007) -[2024-07-05 12:52:43,520][09119] Updated weights for policy 0, policy_version 28398 (0.0011) -[2024-07-05 12:52:44,420][05794] Fps is (10 sec: 49151.9, 60 sec: 48605.9, 300 sec: 45202.3). Total num frames: 212672512. Throughput: 0: 12153.3. Samples: 3145880. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:52:44,421][05794] Avg episode reward: [(0, '52.135')] -[2024-07-05 12:52:45,195][09119] Updated weights for policy 0, policy_version 28408 (0.0008) -[2024-07-05 12:52:46,883][09119] Updated weights for policy 0, policy_version 28418 (0.0008) -[2024-07-05 12:52:48,622][09119] Updated weights for policy 0, policy_version 28428 (0.0007) -[2024-07-05 12:52:49,420][05794] Fps is (10 sec: 48332.7, 60 sec: 48469.4, 300 sec: 45242.8). Total num frames: 212910080. Throughput: 0: 12155.4. Samples: 3218404. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:52:49,421][05794] Avg episode reward: [(0, '49.566')] -[2024-07-05 12:52:50,269][09119] Updated weights for policy 0, policy_version 28438 (0.0008) -[2024-07-05 12:52:51,979][09119] Updated weights for policy 0, policy_version 28448 (0.0008) -[2024-07-05 12:52:53,672][09119] Updated weights for policy 0, policy_version 28458 (0.0008) -[2024-07-05 12:52:54,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48605.8, 300 sec: 45310.2). Total num frames: 213155840. Throughput: 0: 12157.5. Samples: 3254804. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:52:54,421][05794] Avg episode reward: [(0, '51.575')] -[2024-07-05 12:52:55,342][09119] Updated weights for policy 0, policy_version 28468 (0.0009) -[2024-07-05 12:52:57,031][09119] Updated weights for policy 0, policy_version 28478 (0.0007) -[2024-07-05 12:52:58,716][09119] Updated weights for policy 0, policy_version 28488 (0.0011) -[2024-07-05 12:52:59,420][05794] Fps is (10 sec: 49151.0, 60 sec: 48605.7, 300 sec: 45375.3). Total num frames: 213401600. Throughput: 0: 12154.7. Samples: 3327788. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:52:59,421][05794] Avg episode reward: [(0, '50.595')] -[2024-07-05 12:53:00,409][09119] Updated weights for policy 0, policy_version 28498 (0.0009) -[2024-07-05 12:53:02,104][09119] Updated weights for policy 0, policy_version 28508 (0.0008) -[2024-07-05 12:53:03,813][09119] Updated weights for policy 0, policy_version 28518 (0.0008) -[2024-07-05 12:53:04,420][05794] Fps is (10 sec: 48332.1, 60 sec: 48605.7, 300 sec: 46180.7). Total num frames: 213639168. Throughput: 0: 12138.3. Samples: 3400112. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:53:04,421][05794] Avg episode reward: [(0, '52.487')] -[2024-07-05 12:53:05,477][09119] Updated weights for policy 0, policy_version 28528 (0.0008) -[2024-07-05 12:53:07,200][09119] Updated weights for policy 0, policy_version 28538 (0.0008) -[2024-07-05 12:53:08,857][09119] Updated weights for policy 0, policy_version 28548 (0.0008) -[2024-07-05 12:53:09,420][05794] Fps is (10 sec: 48333.5, 60 sec: 48605.9, 300 sec: 46541.7). Total num frames: 213884928. Throughput: 0: 12141.6. Samples: 3436852. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:53:09,422][05794] Avg episode reward: [(0, '51.870')] -[2024-07-05 12:53:10,567][09119] Updated weights for policy 0, policy_version 28558 (0.0007) -[2024-07-05 12:53:12,240][09119] Updated weights for policy 0, policy_version 28568 (0.0008) -[2024-07-05 12:53:13,941][09119] Updated weights for policy 0, policy_version 28578 (0.0008) -[2024-07-05 12:53:14,420][05794] Fps is (10 sec: 48333.7, 60 sec: 48469.4, 300 sec: 46763.8). Total num frames: 214122496. Throughput: 0: 12132.0. Samples: 3509528. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:53:14,421][05794] Avg episode reward: [(0, '49.951')] -[2024-07-05 12:53:15,615][09119] Updated weights for policy 0, policy_version 28588 (0.0008) -[2024-07-05 12:53:17,316][09119] Updated weights for policy 0, policy_version 28598 (0.0008) -[2024-07-05 12:53:19,006][09119] Updated weights for policy 0, policy_version 28608 (0.0010) -[2024-07-05 12:53:19,420][05794] Fps is (10 sec: 48333.2, 60 sec: 48469.3, 300 sec: 46986.0). Total num frames: 214368256. Throughput: 0: 12132.2. Samples: 3582376. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:53:19,421][05794] Avg episode reward: [(0, '49.886')] -[2024-07-05 12:53:20,669][09119] Updated weights for policy 0, policy_version 28618 (0.0008) -[2024-07-05 12:53:22,339][09119] Updated weights for policy 0, policy_version 28628 (0.0008) -[2024-07-05 12:53:24,040][09119] Updated weights for policy 0, policy_version 28638 (0.0007) -[2024-07-05 12:53:24,420][05794] Fps is (10 sec: 49151.4, 60 sec: 48605.9, 300 sec: 47180.4). Total num frames: 214614016. Throughput: 0: 12142.7. Samples: 3619040. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:53:24,421][05794] Avg episode reward: [(0, '49.220')] -[2024-07-05 12:53:25,742][09119] Updated weights for policy 0, policy_version 28648 (0.0008) -[2024-07-05 12:53:27,427][09119] Updated weights for policy 0, policy_version 28658 (0.0007) -[2024-07-05 12:53:29,145][09119] Updated weights for policy 0, policy_version 28668 (0.0008) -[2024-07-05 12:53:29,420][05794] Fps is (10 sec: 48332.2, 60 sec: 48469.2, 300 sec: 47376.2). Total num frames: 214851584. Throughput: 0: 12127.8. Samples: 3691632. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:53:29,421][05794] Avg episode reward: [(0, '52.769')] -[2024-07-05 12:53:30,782][09119] Updated weights for policy 0, policy_version 28678 (0.0007) -[2024-07-05 12:53:32,444][09119] Updated weights for policy 0, policy_version 28688 (0.0007) -[2024-07-05 12:53:34,124][09119] Updated weights for policy 0, policy_version 28698 (0.0007) -[2024-07-05 12:53:34,420][05794] Fps is (10 sec: 48332.6, 60 sec: 48605.8, 300 sec: 47569.1). Total num frames: 215097344. Throughput: 0: 12144.5. Samples: 3764908. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:53:34,421][05794] Avg episode reward: [(0, '52.443')] -[2024-07-05 12:53:35,832][09119] Updated weights for policy 0, policy_version 28708 (0.0008) -[2024-07-05 12:53:37,499][09119] Updated weights for policy 0, policy_version 28718 (0.0008) -[2024-07-05 12:53:39,193][09119] Updated weights for policy 0, policy_version 28728 (0.0008) -[2024-07-05 12:53:39,420][05794] Fps is (10 sec: 49152.3, 60 sec: 48605.8, 300 sec: 47763.6). Total num frames: 215343104. Throughput: 0: 12149.2. Samples: 3801520. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:53:39,421][05794] Avg episode reward: [(0, '50.979')] -[2024-07-05 12:53:40,855][09119] Updated weights for policy 0, policy_version 28738 (0.0010) -[2024-07-05 12:53:42,523][09119] Updated weights for policy 0, policy_version 28748 (0.0009) -[2024-07-05 12:53:44,215][09119] Updated weights for policy 0, policy_version 28758 (0.0008) -[2024-07-05 12:53:44,420][05794] Fps is (10 sec: 49152.4, 60 sec: 48605.8, 300 sec: 47986.7). Total num frames: 215588864. Throughput: 0: 12158.4. Samples: 3874916. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:53:44,421][05794] Avg episode reward: [(0, '52.467')] -[2024-07-05 12:53:45,926][09119] Updated weights for policy 0, policy_version 28768 (0.0013) -[2024-07-05 12:53:47,624][09119] Updated weights for policy 0, policy_version 28778 (0.0008) -[2024-07-05 12:53:49,316][09119] Updated weights for policy 0, policy_version 28788 (0.0008) -[2024-07-05 12:53:49,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48605.8, 300 sec: 48152.3). Total num frames: 215826432. Throughput: 0: 12166.5. Samples: 3947604. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:53:49,421][05794] Avg episode reward: [(0, '51.247')] -[2024-07-05 12:53:50,986][09119] Updated weights for policy 0, policy_version 28798 (0.0008) -[2024-07-05 12:53:52,643][09119] Updated weights for policy 0, policy_version 28808 (0.0009) -[2024-07-05 12:53:54,346][09119] Updated weights for policy 0, policy_version 28818 (0.0008) -[2024-07-05 12:53:54,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48605.8, 300 sec: 48346.7). Total num frames: 216072192. Throughput: 0: 12154.5. Samples: 3983804. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 12:53:54,421][05794] Avg episode reward: [(0, '52.807')] -[2024-07-05 12:53:54,425][09099] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000028818_216072192.pth... -[2024-07-05 12:53:54,496][09099] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000027395_204414976.pth -[2024-07-05 12:53:56,023][09119] Updated weights for policy 0, policy_version 28828 (0.0010) -[2024-07-05 12:53:57,736][09119] Updated weights for policy 0, policy_version 28838 (0.0008) -[2024-07-05 12:53:59,398][09119] Updated weights for policy 0, policy_version 28848 (0.0008) -[2024-07-05 12:53:59,420][05794] Fps is (10 sec: 49152.3, 60 sec: 48606.0, 300 sec: 48541.1). Total num frames: 216317952. Throughput: 0: 12156.2. Samples: 4056556. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:53:59,421][05794] Avg episode reward: [(0, '51.320')] -[2024-07-05 12:54:01,142][09119] Updated weights for policy 0, policy_version 28858 (0.0009) -[2024-07-05 12:54:02,789][09119] Updated weights for policy 0, policy_version 28868 (0.0008) -[2024-07-05 12:54:04,420][05794] Fps is (10 sec: 48332.3, 60 sec: 48605.9, 300 sec: 48596.6). Total num frames: 216555520. Throughput: 0: 12149.8. Samples: 4129120. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:54:04,421][05794] Avg episode reward: [(0, '53.621')] -[2024-07-05 12:54:04,504][09119] Updated weights for policy 0, policy_version 28878 (0.0008) -[2024-07-05 12:54:06,204][09119] Updated weights for policy 0, policy_version 28888 (0.0008) -[2024-07-05 12:54:07,938][09119] Updated weights for policy 0, policy_version 28898 (0.0010) -[2024-07-05 12:54:09,420][05794] Fps is (10 sec: 48332.5, 60 sec: 48605.9, 300 sec: 48596.6). Total num frames: 216801280. Throughput: 0: 12143.8. Samples: 4165508. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:54:09,421][05794] Avg episode reward: [(0, '51.714')] -[2024-07-05 12:54:09,620][09119] Updated weights for policy 0, policy_version 28908 (0.0007) -[2024-07-05 12:54:11,308][09119] Updated weights for policy 0, policy_version 28918 (0.0008) -[2024-07-05 12:54:12,958][09119] Updated weights for policy 0, policy_version 28928 (0.0009) -[2024-07-05 12:54:14,420][05794] Fps is (10 sec: 48332.7, 60 sec: 48605.7, 300 sec: 48596.6). Total num frames: 217038848. Throughput: 0: 12139.4. Samples: 4237904. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:54:14,421][05794] Avg episode reward: [(0, '51.966')] -[2024-07-05 12:54:14,673][09119] Updated weights for policy 0, policy_version 28938 (0.0007) -[2024-07-05 12:54:16,347][09119] Updated weights for policy 0, policy_version 28948 (0.0008) -[2024-07-05 12:54:18,063][09119] Updated weights for policy 0, policy_version 28958 (0.0007) -[2024-07-05 12:54:19,420][05794] Fps is (10 sec: 47513.3, 60 sec: 48469.2, 300 sec: 48568.8). Total num frames: 217276416. Throughput: 0: 12128.0. Samples: 4310668. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:54:19,421][05794] Avg episode reward: [(0, '50.995')] -[2024-07-05 12:54:19,745][09119] Updated weights for policy 0, policy_version 28968 (0.0010) -[2024-07-05 12:54:21,478][09119] Updated weights for policy 0, policy_version 28978 (0.0010) -[2024-07-05 12:54:23,139][09119] Updated weights for policy 0, policy_version 28988 (0.0008) -[2024-07-05 12:54:24,420][05794] Fps is (10 sec: 48332.2, 60 sec: 48469.2, 300 sec: 48568.8). Total num frames: 217522176. Throughput: 0: 12116.5. Samples: 4346768. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:54:24,421][05794] Avg episode reward: [(0, '49.594')] -[2024-07-05 12:54:24,814][09119] Updated weights for policy 0, policy_version 28998 (0.0009) -[2024-07-05 12:54:26,508][09119] Updated weights for policy 0, policy_version 29008 (0.0007) -[2024-07-05 12:54:28,209][09119] Updated weights for policy 0, policy_version 29018 (0.0010) -[2024-07-05 12:54:29,420][05794] Fps is (10 sec: 49151.9, 60 sec: 48605.9, 300 sec: 48596.6). Total num frames: 217767936. Throughput: 0: 12111.5. Samples: 4419936. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:54:29,422][05794] Avg episode reward: [(0, '49.560')] -[2024-07-05 12:54:29,890][09119] Updated weights for policy 0, policy_version 29028 (0.0008) -[2024-07-05 12:54:31,568][09119] Updated weights for policy 0, policy_version 29038 (0.0007) -[2024-07-05 12:54:33,256][09119] Updated weights for policy 0, policy_version 29048 (0.0008) -[2024-07-05 12:54:34,420][05794] Fps is (10 sec: 48334.5, 60 sec: 48469.5, 300 sec: 48568.9). Total num frames: 218005504. Throughput: 0: 12102.0. Samples: 4492192. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:54:34,421][05794] Avg episode reward: [(0, '53.601')] -[2024-07-05 12:54:34,935][09119] Updated weights for policy 0, policy_version 29058 (0.0008) -[2024-07-05 12:54:36,620][09119] Updated weights for policy 0, policy_version 29068 (0.0008) -[2024-07-05 12:54:38,330][09119] Updated weights for policy 0, policy_version 29078 (0.0007) -[2024-07-05 12:54:39,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48469.3, 300 sec: 48568.9). Total num frames: 218251264. Throughput: 0: 12108.5. Samples: 4528688. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:54:39,421][05794] Avg episode reward: [(0, '49.394')] -[2024-07-05 12:54:40,020][09119] Updated weights for policy 0, policy_version 29088 (0.0011) -[2024-07-05 12:54:41,711][09119] Updated weights for policy 0, policy_version 29098 (0.0008) -[2024-07-05 12:54:43,396][09119] Updated weights for policy 0, policy_version 29108 (0.0010) -[2024-07-05 12:54:44,420][05794] Fps is (10 sec: 49151.6, 60 sec: 48469.3, 300 sec: 48568.8). Total num frames: 218497024. Throughput: 0: 12117.3. Samples: 4601836. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:54:44,421][05794] Avg episode reward: [(0, '53.410')] -[2024-07-05 12:54:45,078][09119] Updated weights for policy 0, policy_version 29118 (0.0008) -[2024-07-05 12:54:46,766][09119] Updated weights for policy 0, policy_version 29128 (0.0008) -[2024-07-05 12:54:48,424][09119] Updated weights for policy 0, policy_version 29138 (0.0007) -[2024-07-05 12:54:49,420][05794] Fps is (10 sec: 48333.1, 60 sec: 48469.3, 300 sec: 48568.8). Total num frames: 218734592. Throughput: 0: 12119.4. Samples: 4674492. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:54:49,421][05794] Avg episode reward: [(0, '51.384')] -[2024-07-05 12:54:50,156][09119] Updated weights for policy 0, policy_version 29148 (0.0008) -[2024-07-05 12:54:51,843][09119] Updated weights for policy 0, policy_version 29158 (0.0010) -[2024-07-05 12:54:53,561][09119] Updated weights for policy 0, policy_version 29168 (0.0008) -[2024-07-05 12:54:54,420][05794] Fps is (10 sec: 48333.0, 60 sec: 48469.4, 300 sec: 48568.8). Total num frames: 218980352. Throughput: 0: 12114.9. Samples: 4710680. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:54:54,421][05794] Avg episode reward: [(0, '49.480')] -[2024-07-05 12:54:55,216][09119] Updated weights for policy 0, policy_version 29178 (0.0010) -[2024-07-05 12:54:56,852][09119] Updated weights for policy 0, policy_version 29188 (0.0008) -[2024-07-05 12:54:58,510][09119] Updated weights for policy 0, policy_version 29198 (0.0010) -[2024-07-05 12:54:59,420][05794] Fps is (10 sec: 48332.5, 60 sec: 48332.7, 300 sec: 48541.1). Total num frames: 219217920. Throughput: 0: 12134.1. Samples: 4783936. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:54:59,422][05794] Avg episode reward: [(0, '51.365')] -[2024-07-05 12:55:00,189][09119] Updated weights for policy 0, policy_version 29208 (0.0007) -[2024-07-05 12:55:01,927][09119] Updated weights for policy 0, policy_version 29218 (0.0008) -[2024-07-05 12:55:03,622][09119] Updated weights for policy 0, policy_version 29228 (0.0011) -[2024-07-05 12:55:04,420][05794] Fps is (10 sec: 48333.0, 60 sec: 48469.5, 300 sec: 48568.9). Total num frames: 219463680. Throughput: 0: 12132.2. Samples: 4856616. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:55:04,421][05794] Avg episode reward: [(0, '50.050')] -[2024-07-05 12:55:05,294][09119] Updated weights for policy 0, policy_version 29238 (0.0009) -[2024-07-05 12:55:06,960][09119] Updated weights for policy 0, policy_version 29248 (0.0009) -[2024-07-05 12:55:08,670][09119] Updated weights for policy 0, policy_version 29258 (0.0007) -[2024-07-05 12:55:09,420][05794] Fps is (10 sec: 49152.2, 60 sec: 48469.3, 300 sec: 48568.8). Total num frames: 219709440. Throughput: 0: 12141.1. Samples: 4893116. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 12:55:09,421][05794] Avg episode reward: [(0, '53.212')] -[2024-07-05 12:55:10,367][09119] Updated weights for policy 0, policy_version 29268 (0.0008) -[2024-07-05 12:55:12,076][09119] Updated weights for policy 0, policy_version 29278 (0.0008) -[2024-07-05 12:55:13,766][09119] Updated weights for policy 0, policy_version 29288 (0.0008) -[2024-07-05 12:55:14,420][05794] Fps is (10 sec: 48332.3, 60 sec: 48469.4, 300 sec: 48541.1). Total num frames: 219947008. Throughput: 0: 12131.4. Samples: 4965848. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:55:14,421][05794] Avg episode reward: [(0, '50.396')] -[2024-07-05 12:55:15,462][09119] Updated weights for policy 0, policy_version 29298 (0.0010) -[2024-07-05 12:55:17,160][09119] Updated weights for policy 0, policy_version 29308 (0.0007) -[2024-07-05 12:55:18,833][09119] Updated weights for policy 0, policy_version 29318 (0.0007) -[2024-07-05 12:55:19,420][05794] Fps is (10 sec: 48333.0, 60 sec: 48605.9, 300 sec: 48568.9). Total num frames: 220192768. Throughput: 0: 12137.4. Samples: 5038376. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:55:19,421][05794] Avg episode reward: [(0, '54.605')] -[2024-07-05 12:55:20,522][09119] Updated weights for policy 0, policy_version 29328 (0.0008) -[2024-07-05 12:55:22,199][09119] Updated weights for policy 0, policy_version 29338 (0.0008) -[2024-07-05 12:55:23,844][09119] Updated weights for policy 0, policy_version 29348 (0.0007) -[2024-07-05 12:55:24,420][05794] Fps is (10 sec: 49152.3, 60 sec: 48606.1, 300 sec: 48568.9). Total num frames: 220438528. Throughput: 0: 12141.0. Samples: 5075032. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:55:24,421][05794] Avg episode reward: [(0, '52.513')] -[2024-07-05 12:55:25,575][09119] Updated weights for policy 0, policy_version 29358 (0.0007) -[2024-07-05 12:55:27,216][09119] Updated weights for policy 0, policy_version 29368 (0.0008) -[2024-07-05 12:55:28,945][09119] Updated weights for policy 0, policy_version 29378 (0.0007) -[2024-07-05 12:55:29,420][05794] Fps is (10 sec: 48332.7, 60 sec: 48469.4, 300 sec: 48541.1). Total num frames: 220676096. Throughput: 0: 12136.6. Samples: 5147984. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:55:29,421][05794] Avg episode reward: [(0, '52.400')] -[2024-07-05 12:55:30,638][09119] Updated weights for policy 0, policy_version 29388 (0.0008) -[2024-07-05 12:55:32,323][09119] Updated weights for policy 0, policy_version 29398 (0.0008) -[2024-07-05 12:55:33,999][09119] Updated weights for policy 0, policy_version 29408 (0.0010) -[2024-07-05 12:55:34,420][05794] Fps is (10 sec: 48332.5, 60 sec: 48605.8, 300 sec: 48568.8). Total num frames: 220921856. Throughput: 0: 12133.6. Samples: 5220504. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:55:34,421][05794] Avg episode reward: [(0, '50.932')] -[2024-07-05 12:55:35,723][09119] Updated weights for policy 0, policy_version 29418 (0.0007) -[2024-07-05 12:55:37,392][09119] Updated weights for policy 0, policy_version 29428 (0.0010) -[2024-07-05 12:55:39,059][09119] Updated weights for policy 0, policy_version 29438 (0.0007) -[2024-07-05 12:55:39,420][05794] Fps is (10 sec: 49151.2, 60 sec: 48605.8, 300 sec: 48568.8). Total num frames: 221167616. Throughput: 0: 12142.1. Samples: 5257076. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:55:39,422][05794] Avg episode reward: [(0, '50.432')] -[2024-07-05 12:55:40,767][09119] Updated weights for policy 0, policy_version 29448 (0.0010) -[2024-07-05 12:55:42,490][09119] Updated weights for policy 0, policy_version 29458 (0.0011) -[2024-07-05 12:55:44,162][09119] Updated weights for policy 0, policy_version 29468 (0.0007) -[2024-07-05 12:55:44,420][05794] Fps is (10 sec: 48333.2, 60 sec: 48469.4, 300 sec: 48541.1). Total num frames: 221405184. Throughput: 0: 12126.3. Samples: 5329620. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:55:44,421][05794] Avg episode reward: [(0, '51.196')] -[2024-07-05 12:55:45,828][09119] Updated weights for policy 0, policy_version 29478 (0.0008) -[2024-07-05 12:55:47,523][09119] Updated weights for policy 0, policy_version 29488 (0.0007) -[2024-07-05 12:55:49,206][09119] Updated weights for policy 0, policy_version 29498 (0.0008) -[2024-07-05 12:55:49,420][05794] Fps is (10 sec: 48332.0, 60 sec: 48605.6, 300 sec: 48568.8). Total num frames: 221650944. Throughput: 0: 12135.0. Samples: 5402696. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:55:49,421][05794] Avg episode reward: [(0, '50.992')] -[2024-07-05 12:55:50,916][09119] Updated weights for policy 0, policy_version 29508 (0.0008) -[2024-07-05 12:55:52,572][09119] Updated weights for policy 0, policy_version 29518 (0.0008) -[2024-07-05 12:55:54,266][09119] Updated weights for policy 0, policy_version 29528 (0.0008) -[2024-07-05 12:55:54,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48469.3, 300 sec: 48541.1). Total num frames: 221888512. Throughput: 0: 12132.6. Samples: 5439084. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:55:54,421][05794] Avg episode reward: [(0, '48.587')] -[2024-07-05 12:55:54,425][09099] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000029529_221896704.pth... -[2024-07-05 12:55:54,492][09099] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000028107_210247680.pth -[2024-07-05 12:55:55,955][09119] Updated weights for policy 0, policy_version 29538 (0.0007) -[2024-07-05 12:55:57,643][09119] Updated weights for policy 0, policy_version 29548 (0.0008) -[2024-07-05 12:55:59,341][09119] Updated weights for policy 0, policy_version 29558 (0.0007) -[2024-07-05 12:55:59,420][05794] Fps is (10 sec: 48334.2, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 222134272. Throughput: 0: 12118.8. Samples: 5511192. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:55:59,421][05794] Avg episode reward: [(0, '51.193')] -[2024-07-05 12:56:01,035][09119] Updated weights for policy 0, policy_version 29568 (0.0007) -[2024-07-05 12:56:02,709][09119] Updated weights for policy 0, policy_version 29578 (0.0009) -[2024-07-05 12:56:04,420][05794] Fps is (10 sec: 48332.4, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 222371840. Throughput: 0: 12127.0. Samples: 5584092. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:56:04,421][05794] Avg episode reward: [(0, '48.853')] -[2024-07-05 12:56:04,427][09119] Updated weights for policy 0, policy_version 29588 (0.0008) -[2024-07-05 12:56:06,122][09119] Updated weights for policy 0, policy_version 29598 (0.0011) -[2024-07-05 12:56:07,826][09119] Updated weights for policy 0, policy_version 29608 (0.0008) -[2024-07-05 12:56:09,420][05794] Fps is (10 sec: 48332.5, 60 sec: 48469.3, 300 sec: 48541.1). Total num frames: 222617600. Throughput: 0: 12115.3. Samples: 5620220. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:56:09,421][05794] Avg episode reward: [(0, '51.042')] -[2024-07-05 12:56:09,529][09119] Updated weights for policy 0, policy_version 29618 (0.0008) -[2024-07-05 12:56:11,218][09119] Updated weights for policy 0, policy_version 29628 (0.0008) -[2024-07-05 12:56:12,890][09119] Updated weights for policy 0, policy_version 29638 (0.0009) -[2024-07-05 12:56:14,420][05794] Fps is (10 sec: 49152.2, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 222863360. Throughput: 0: 12118.9. Samples: 5693336. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:56:14,421][05794] Avg episode reward: [(0, '47.487')] -[2024-07-05 12:56:14,531][09119] Updated weights for policy 0, policy_version 29648 (0.0008) -[2024-07-05 12:56:16,228][09119] Updated weights for policy 0, policy_version 29658 (0.0008) -[2024-07-05 12:56:17,945][09119] Updated weights for policy 0, policy_version 29668 (0.0010) -[2024-07-05 12:56:19,420][05794] Fps is (10 sec: 48333.2, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 223100928. Throughput: 0: 12118.8. Samples: 5765848. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:56:19,421][05794] Avg episode reward: [(0, '49.437')] -[2024-07-05 12:56:19,654][09119] Updated weights for policy 0, policy_version 29678 (0.0008) -[2024-07-05 12:56:21,342][09119] Updated weights for policy 0, policy_version 29688 (0.0011) -[2024-07-05 12:56:23,022][09119] Updated weights for policy 0, policy_version 29698 (0.0007) -[2024-07-05 12:56:24,420][05794] Fps is (10 sec: 48332.9, 60 sec: 48469.3, 300 sec: 48541.1). Total num frames: 223346688. Throughput: 0: 12113.9. Samples: 5802200. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:56:24,421][05794] Avg episode reward: [(0, '50.719')] -[2024-07-05 12:56:24,746][09119] Updated weights for policy 0, policy_version 29708 (0.0008) -[2024-07-05 12:56:26,424][09119] Updated weights for policy 0, policy_version 29718 (0.0009) -[2024-07-05 12:56:28,127][09119] Updated weights for policy 0, policy_version 29728 (0.0007) -[2024-07-05 12:56:29,420][05794] Fps is (10 sec: 48333.1, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 223584256. Throughput: 0: 12119.0. Samples: 5874976. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:56:29,421][05794] Avg episode reward: [(0, '47.288')] -[2024-07-05 12:56:29,776][09119] Updated weights for policy 0, policy_version 29738 (0.0007) -[2024-07-05 12:56:31,468][09119] Updated weights for policy 0, policy_version 29748 (0.0007) -[2024-07-05 12:56:33,184][09119] Updated weights for policy 0, policy_version 29758 (0.0008) -[2024-07-05 12:56:34,420][05794] Fps is (10 sec: 48333.0, 60 sec: 48469.4, 300 sec: 48541.1). Total num frames: 223830016. Throughput: 0: 12107.6. Samples: 5947532. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:56:34,421][05794] Avg episode reward: [(0, '53.914')] -[2024-07-05 12:56:34,860][09119] Updated weights for policy 0, policy_version 29768 (0.0007) -[2024-07-05 12:56:36,572][09119] Updated weights for policy 0, policy_version 29778 (0.0007) -[2024-07-05 12:56:38,281][09119] Updated weights for policy 0, policy_version 29788 (0.0008) -[2024-07-05 12:56:39,420][05794] Fps is (10 sec: 48333.2, 60 sec: 48333.0, 300 sec: 48513.3). Total num frames: 224067584. Throughput: 0: 12096.7. Samples: 5983436. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:56:39,421][05794] Avg episode reward: [(0, '50.585')] -[2024-07-05 12:56:39,957][09119] Updated weights for policy 0, policy_version 29798 (0.0007) -[2024-07-05 12:56:41,657][09119] Updated weights for policy 0, policy_version 29808 (0.0007) -[2024-07-05 12:56:43,367][09119] Updated weights for policy 0, policy_version 29818 (0.0007) -[2024-07-05 12:56:44,420][05794] Fps is (10 sec: 48332.5, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 224313344. Throughput: 0: 12117.8. Samples: 6056492. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:56:44,421][05794] Avg episode reward: [(0, '49.564')] -[2024-07-05 12:56:45,043][09119] Updated weights for policy 0, policy_version 29828 (0.0009) -[2024-07-05 12:56:46,721][09119] Updated weights for policy 0, policy_version 29838 (0.0008) -[2024-07-05 12:56:48,431][09119] Updated weights for policy 0, policy_version 29848 (0.0008) -[2024-07-05 12:56:49,420][05794] Fps is (10 sec: 49151.3, 60 sec: 48469.6, 300 sec: 48541.1). Total num frames: 224559104. Throughput: 0: 12107.7. Samples: 6128936. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:56:49,422][05794] Avg episode reward: [(0, '46.381')] -[2024-07-05 12:56:50,072][09119] Updated weights for policy 0, policy_version 29858 (0.0008) -[2024-07-05 12:56:51,792][09119] Updated weights for policy 0, policy_version 29868 (0.0007) -[2024-07-05 12:56:53,477][09119] Updated weights for policy 0, policy_version 29878 (0.0007) -[2024-07-05 12:56:54,420][05794] Fps is (10 sec: 48332.9, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 224796672. Throughput: 0: 12112.2. Samples: 6165268. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:56:54,421][05794] Avg episode reward: [(0, '50.686')] -[2024-07-05 12:56:55,135][09119] Updated weights for policy 0, policy_version 29888 (0.0007) -[2024-07-05 12:56:56,824][09119] Updated weights for policy 0, policy_version 29898 (0.0007) -[2024-07-05 12:56:58,511][09119] Updated weights for policy 0, policy_version 29908 (0.0007) -[2024-07-05 12:56:59,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48469.3, 300 sec: 48541.1). Total num frames: 225042432. Throughput: 0: 12112.6. Samples: 6238404. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:56:59,421][05794] Avg episode reward: [(0, '50.153')] -[2024-07-05 12:57:00,177][09119] Updated weights for policy 0, policy_version 29918 (0.0009) -[2024-07-05 12:57:01,863][09119] Updated weights for policy 0, policy_version 29928 (0.0008) -[2024-07-05 12:57:03,564][09119] Updated weights for policy 0, policy_version 29938 (0.0008) -[2024-07-05 12:57:04,420][05794] Fps is (10 sec: 49151.4, 60 sec: 48605.8, 300 sec: 48541.1). Total num frames: 225288192. Throughput: 0: 12126.1. Samples: 6311524. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:57:04,421][05794] Avg episode reward: [(0, '49.873')] -[2024-07-05 12:57:05,248][09119] Updated weights for policy 0, policy_version 29948 (0.0010) -[2024-07-05 12:57:06,951][09119] Updated weights for policy 0, policy_version 29958 (0.0008) -[2024-07-05 12:57:08,647][09119] Updated weights for policy 0, policy_version 29968 (0.0007) -[2024-07-05 12:57:09,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 225525760. Throughput: 0: 12122.5. Samples: 6347712. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:57:09,421][05794] Avg episode reward: [(0, '53.014')] -[2024-07-05 12:57:10,339][09119] Updated weights for policy 0, policy_version 29978 (0.0008) -[2024-07-05 12:57:12,029][09119] Updated weights for policy 0, policy_version 29988 (0.0007) -[2024-07-05 12:57:13,695][09119] Updated weights for policy 0, policy_version 29998 (0.0007) -[2024-07-05 12:57:14,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 225771520. Throughput: 0: 12123.3. Samples: 6420524. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:57:14,422][05794] Avg episode reward: [(0, '48.261')] -[2024-07-05 12:57:15,392][09119] Updated weights for policy 0, policy_version 30008 (0.0007) -[2024-07-05 12:57:17,102][09119] Updated weights for policy 0, policy_version 30018 (0.0008) -[2024-07-05 12:57:18,799][09119] Updated weights for policy 0, policy_version 30028 (0.0007) -[2024-07-05 12:57:19,420][05794] Fps is (10 sec: 48332.9, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 226009088. Throughput: 0: 12118.9. Samples: 6492884. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:57:19,421][05794] Avg episode reward: [(0, '50.305')] -[2024-07-05 12:57:20,489][09119] Updated weights for policy 0, policy_version 30038 (0.0008) -[2024-07-05 12:57:22,186][09119] Updated weights for policy 0, policy_version 30048 (0.0008) -[2024-07-05 12:57:23,885][09119] Updated weights for policy 0, policy_version 30058 (0.0009) -[2024-07-05 12:57:24,420][05794] Fps is (10 sec: 48332.3, 60 sec: 48469.2, 300 sec: 48513.3). Total num frames: 226254848. Throughput: 0: 12127.4. Samples: 6529172. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:57:24,422][05794] Avg episode reward: [(0, '50.911')] -[2024-07-05 12:57:25,585][09119] Updated weights for policy 0, policy_version 30068 (0.0010) -[2024-07-05 12:57:27,254][09119] Updated weights for policy 0, policy_version 30078 (0.0008) -[2024-07-05 12:57:28,989][09119] Updated weights for policy 0, policy_version 30088 (0.0007) -[2024-07-05 12:57:29,420][05794] Fps is (10 sec: 48332.5, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 226492416. Throughput: 0: 12115.1. Samples: 6601672. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 12:57:29,421][05794] Avg episode reward: [(0, '53.838')] -[2024-07-05 12:57:30,670][09119] Updated weights for policy 0, policy_version 30098 (0.0007) -[2024-07-05 12:57:32,344][09119] Updated weights for policy 0, policy_version 30108 (0.0007) -[2024-07-05 12:57:34,030][09119] Updated weights for policy 0, policy_version 30118 (0.0008) -[2024-07-05 12:57:34,420][05794] Fps is (10 sec: 48333.8, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 226738176. Throughput: 0: 12127.6. Samples: 6674680. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:57:34,421][05794] Avg episode reward: [(0, '51.364')] -[2024-07-05 12:57:35,694][09119] Updated weights for policy 0, policy_version 30128 (0.0010) -[2024-07-05 12:57:37,340][09119] Updated weights for policy 0, policy_version 30138 (0.0008) -[2024-07-05 12:57:39,051][09119] Updated weights for policy 0, policy_version 30148 (0.0008) -[2024-07-05 12:57:39,420][05794] Fps is (10 sec: 49152.3, 60 sec: 48605.8, 300 sec: 48513.3). Total num frames: 226983936. Throughput: 0: 12131.2. Samples: 6711172. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:57:39,421][05794] Avg episode reward: [(0, '48.222')] -[2024-07-05 12:57:40,734][09119] Updated weights for policy 0, policy_version 30158 (0.0008) -[2024-07-05 12:57:42,437][09119] Updated weights for policy 0, policy_version 30168 (0.0007) -[2024-07-05 12:57:44,130][09119] Updated weights for policy 0, policy_version 30178 (0.0008) -[2024-07-05 12:57:44,420][05794] Fps is (10 sec: 48332.6, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 227221504. Throughput: 0: 12122.7. Samples: 6783928. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:57:44,421][05794] Avg episode reward: [(0, '52.136')] -[2024-07-05 12:57:45,799][09119] Updated weights for policy 0, policy_version 30188 (0.0008) -[2024-07-05 12:57:47,491][09119] Updated weights for policy 0, policy_version 30198 (0.0007) -[2024-07-05 12:57:49,214][09119] Updated weights for policy 0, policy_version 30208 (0.0009) -[2024-07-05 12:57:49,420][05794] Fps is (10 sec: 48332.2, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 227467264. Throughput: 0: 12116.6. Samples: 6856772. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:57:49,421][05794] Avg episode reward: [(0, '50.374')] -[2024-07-05 12:57:50,875][09119] Updated weights for policy 0, policy_version 30218 (0.0009) -[2024-07-05 12:57:52,546][09119] Updated weights for policy 0, policy_version 30228 (0.0008) -[2024-07-05 12:57:54,236][09119] Updated weights for policy 0, policy_version 30238 (0.0010) -[2024-07-05 12:57:54,420][05794] Fps is (10 sec: 48333.4, 60 sec: 48469.4, 300 sec: 48485.6). Total num frames: 227704832. Throughput: 0: 12123.8. Samples: 6893280. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:57:54,421][05794] Avg episode reward: [(0, '50.797')] -[2024-07-05 12:57:54,452][09099] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000030239_227713024.pth... -[2024-07-05 12:57:54,516][09099] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000028818_216072192.pth -[2024-07-05 12:57:55,924][09119] Updated weights for policy 0, policy_version 30248 (0.0008) -[2024-07-05 12:57:57,613][09119] Updated weights for policy 0, policy_version 30258 (0.0008) -[2024-07-05 12:57:59,318][09119] Updated weights for policy 0, policy_version 30268 (0.0008) -[2024-07-05 12:57:59,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 227950592. Throughput: 0: 12109.2. Samples: 6965440. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:57:59,421][05794] Avg episode reward: [(0, '52.585')] -[2024-07-05 12:58:01,032][09119] Updated weights for policy 0, policy_version 30278 (0.0007) -[2024-07-05 12:58:02,760][09119] Updated weights for policy 0, policy_version 30288 (0.0011) -[2024-07-05 12:58:04,411][09119] Updated weights for policy 0, policy_version 30298 (0.0010) -[2024-07-05 12:58:04,420][05794] Fps is (10 sec: 49151.7, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 228196352. Throughput: 0: 12126.0. Samples: 7038552. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:58:04,421][05794] Avg episode reward: [(0, '51.961')] -[2024-07-05 12:58:06,104][09119] Updated weights for policy 0, policy_version 30308 (0.0007) -[2024-07-05 12:58:07,740][09119] Updated weights for policy 0, policy_version 30318 (0.0008) -[2024-07-05 12:58:09,404][09119] Updated weights for policy 0, policy_version 30328 (0.0007) -[2024-07-05 12:58:09,420][05794] Fps is (10 sec: 49152.4, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 228442112. Throughput: 0: 12132.4. Samples: 7075128. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:58:09,421][05794] Avg episode reward: [(0, '52.741')] -[2024-07-05 12:58:11,127][09119] Updated weights for policy 0, policy_version 30338 (0.0010) -[2024-07-05 12:58:12,786][09119] Updated weights for policy 0, policy_version 30348 (0.0008) -[2024-07-05 12:58:14,420][05794] Fps is (10 sec: 48331.9, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 228679680. Throughput: 0: 12142.4. Samples: 7148080. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:58:14,422][05794] Avg episode reward: [(0, '53.388')] -[2024-07-05 12:58:14,481][09119] Updated weights for policy 0, policy_version 30358 (0.0007) -[2024-07-05 12:58:16,236][09119] Updated weights for policy 0, policy_version 30368 (0.0007) -[2024-07-05 12:58:17,910][09119] Updated weights for policy 0, policy_version 30378 (0.0008) -[2024-07-05 12:58:19,420][05794] Fps is (10 sec: 48332.9, 60 sec: 48605.9, 300 sec: 48513.3). Total num frames: 228925440. Throughput: 0: 12138.8. Samples: 7220924. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:58:19,421][05794] Avg episode reward: [(0, '50.484')] -[2024-07-05 12:58:19,614][09119] Updated weights for policy 0, policy_version 30388 (0.0008) -[2024-07-05 12:58:21,311][09119] Updated weights for policy 0, policy_version 30398 (0.0008) -[2024-07-05 12:58:22,992][09119] Updated weights for policy 0, policy_version 30408 (0.0007) -[2024-07-05 12:58:24,420][05794] Fps is (10 sec: 48333.7, 60 sec: 48469.5, 300 sec: 48513.3). Total num frames: 229163008. Throughput: 0: 12130.9. Samples: 7257064. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:58:24,421][05794] Avg episode reward: [(0, '54.475')] -[2024-07-05 12:58:24,678][09119] Updated weights for policy 0, policy_version 30418 (0.0010) -[2024-07-05 12:58:26,350][09119] Updated weights for policy 0, policy_version 30428 (0.0007) -[2024-07-05 12:58:28,049][09119] Updated weights for policy 0, policy_version 30438 (0.0007) -[2024-07-05 12:58:29,420][05794] Fps is (10 sec: 48331.8, 60 sec: 48605.7, 300 sec: 48513.3). Total num frames: 229408768. Throughput: 0: 12120.8. Samples: 7329368. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:58:29,421][05794] Avg episode reward: [(0, '53.466')] -[2024-07-05 12:58:29,750][09119] Updated weights for policy 0, policy_version 30448 (0.0007) -[2024-07-05 12:58:31,419][09119] Updated weights for policy 0, policy_version 30458 (0.0008) -[2024-07-05 12:58:33,120][09119] Updated weights for policy 0, policy_version 30468 (0.0007) -[2024-07-05 12:58:34,420][05794] Fps is (10 sec: 48333.1, 60 sec: 48469.4, 300 sec: 48485.5). Total num frames: 229646336. Throughput: 0: 12118.5. Samples: 7402104. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:58:34,421][05794] Avg episode reward: [(0, '50.805')] -[2024-07-05 12:58:34,816][09119] Updated weights for policy 0, policy_version 30478 (0.0008) -[2024-07-05 12:58:36,517][09119] Updated weights for policy 0, policy_version 30488 (0.0010) -[2024-07-05 12:58:38,196][09119] Updated weights for policy 0, policy_version 30498 (0.0007) -[2024-07-05 12:58:39,420][05794] Fps is (10 sec: 48333.9, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 229892096. Throughput: 0: 12116.6. Samples: 7438528. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:58:39,421][05794] Avg episode reward: [(0, '53.290')] -[2024-07-05 12:58:39,918][09119] Updated weights for policy 0, policy_version 30508 (0.0008) -[2024-07-05 12:58:41,623][09119] Updated weights for policy 0, policy_version 30518 (0.0010) -[2024-07-05 12:58:43,315][09119] Updated weights for policy 0, policy_version 30528 (0.0008) -[2024-07-05 12:58:44,420][05794] Fps is (10 sec: 48332.3, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 230129664. Throughput: 0: 12123.6. Samples: 7511000. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:58:44,421][05794] Avg episode reward: [(0, '52.041')] -[2024-07-05 12:58:44,968][09119] Updated weights for policy 0, policy_version 30538 (0.0008) -[2024-07-05 12:58:46,670][09119] Updated weights for policy 0, policy_version 30548 (0.0008) -[2024-07-05 12:58:48,335][09119] Updated weights for policy 0, policy_version 30558 (0.0007) -[2024-07-05 12:58:49,420][05794] Fps is (10 sec: 48332.0, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 230375424. Throughput: 0: 12115.5. Samples: 7583752. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:58:49,421][05794] Avg episode reward: [(0, '52.253')] -[2024-07-05 12:58:49,998][09119] Updated weights for policy 0, policy_version 30568 (0.0008) -[2024-07-05 12:58:51,708][09119] Updated weights for policy 0, policy_version 30578 (0.0008) -[2024-07-05 12:58:53,412][09119] Updated weights for policy 0, policy_version 30588 (0.0008) -[2024-07-05 12:58:54,420][05794] Fps is (10 sec: 49152.3, 60 sec: 48605.8, 300 sec: 48485.5). Total num frames: 230621184. Throughput: 0: 12121.3. Samples: 7620584. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:58:54,421][05794] Avg episode reward: [(0, '49.832')] -[2024-07-05 12:58:55,100][09119] Updated weights for policy 0, policy_version 30598 (0.0008) -[2024-07-05 12:58:56,789][09119] Updated weights for policy 0, policy_version 30608 (0.0007) -[2024-07-05 12:58:58,480][09119] Updated weights for policy 0, policy_version 30618 (0.0007) -[2024-07-05 12:58:59,420][05794] Fps is (10 sec: 48332.3, 60 sec: 48469.2, 300 sec: 48485.5). Total num frames: 230858752. Throughput: 0: 12106.8. Samples: 7692888. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:58:59,421][05794] Avg episode reward: [(0, '50.529')] -[2024-07-05 12:59:00,153][09119] Updated weights for policy 0, policy_version 30628 (0.0007) -[2024-07-05 12:59:01,862][09119] Updated weights for policy 0, policy_version 30638 (0.0007) -[2024-07-05 12:59:03,563][09119] Updated weights for policy 0, policy_version 30648 (0.0007) -[2024-07-05 12:59:04,420][05794] Fps is (10 sec: 48332.7, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 231104512. Throughput: 0: 12098.3. Samples: 7765348. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:59:04,421][05794] Avg episode reward: [(0, '52.793')] -[2024-07-05 12:59:05,255][09119] Updated weights for policy 0, policy_version 30658 (0.0009) -[2024-07-05 12:59:06,944][09119] Updated weights for policy 0, policy_version 30668 (0.0008) -[2024-07-05 12:59:08,638][09119] Updated weights for policy 0, policy_version 30678 (0.0008) -[2024-07-05 12:59:09,420][05794] Fps is (10 sec: 48334.1, 60 sec: 48332.8, 300 sec: 48485.6). Total num frames: 231342080. Throughput: 0: 12095.4. Samples: 7801356. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:59:09,421][05794] Avg episode reward: [(0, '52.274')] -[2024-07-05 12:59:10,330][09119] Updated weights for policy 0, policy_version 30688 (0.0007) -[2024-07-05 12:59:12,031][09119] Updated weights for policy 0, policy_version 30698 (0.0007) -[2024-07-05 12:59:13,744][09119] Updated weights for policy 0, policy_version 30708 (0.0009) -[2024-07-05 12:59:14,420][05794] Fps is (10 sec: 47513.5, 60 sec: 48332.9, 300 sec: 48485.5). Total num frames: 231579648. Throughput: 0: 12106.6. Samples: 7874164. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:59:14,421][05794] Avg episode reward: [(0, '53.476')] -[2024-07-05 12:59:15,420][09119] Updated weights for policy 0, policy_version 30718 (0.0011) -[2024-07-05 12:59:17,129][09119] Updated weights for policy 0, policy_version 30728 (0.0007) -[2024-07-05 12:59:18,784][09119] Updated weights for policy 0, policy_version 30738 (0.0007) -[2024-07-05 12:59:19,420][05794] Fps is (10 sec: 48331.8, 60 sec: 48332.7, 300 sec: 48485.6). Total num frames: 231825408. Throughput: 0: 12101.8. Samples: 7946688. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:59:19,422][05794] Avg episode reward: [(0, '49.059')] -[2024-07-05 12:59:20,515][09119] Updated weights for policy 0, policy_version 30748 (0.0011) -[2024-07-05 12:59:22,198][09119] Updated weights for policy 0, policy_version 30758 (0.0007) -[2024-07-05 12:59:23,856][09119] Updated weights for policy 0, policy_version 30768 (0.0010) -[2024-07-05 12:59:24,420][05794] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48457.8). Total num frames: 232062976. Throughput: 0: 12104.2. Samples: 7983216. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 12:59:24,421][05794] Avg episode reward: [(0, '51.491')] -[2024-07-05 12:59:25,559][09119] Updated weights for policy 0, policy_version 30778 (0.0008) -[2024-07-05 12:59:27,267][09119] Updated weights for policy 0, policy_version 30788 (0.0007) -[2024-07-05 12:59:28,967][09119] Updated weights for policy 0, policy_version 30798 (0.0008) -[2024-07-05 12:59:29,420][05794] Fps is (10 sec: 49153.1, 60 sec: 48469.5, 300 sec: 48513.3). Total num frames: 232316928. Throughput: 0: 12105.1. Samples: 8055728. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:59:29,421][05794] Avg episode reward: [(0, '50.893')] -[2024-07-05 12:59:30,654][09119] Updated weights for policy 0, policy_version 30808 (0.0008) -[2024-07-05 12:59:32,321][09119] Updated weights for policy 0, policy_version 30818 (0.0011) -[2024-07-05 12:59:34,026][09119] Updated weights for policy 0, policy_version 30828 (0.0010) -[2024-07-05 12:59:34,420][05794] Fps is (10 sec: 49152.0, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 232554496. Throughput: 0: 12108.7. Samples: 8128640. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:59:34,421][05794] Avg episode reward: [(0, '53.684')] -[2024-07-05 12:59:35,677][09119] Updated weights for policy 0, policy_version 30838 (0.0007) -[2024-07-05 12:59:37,391][09119] Updated weights for policy 0, policy_version 30848 (0.0008) -[2024-07-05 12:59:39,120][09119] Updated weights for policy 0, policy_version 30858 (0.0007) -[2024-07-05 12:59:39,420][05794] Fps is (10 sec: 47513.7, 60 sec: 48332.8, 300 sec: 48457.8). Total num frames: 232792064. Throughput: 0: 12094.9. Samples: 8164852. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:59:39,421][05794] Avg episode reward: [(0, '54.095')] -[2024-07-05 12:59:40,810][09119] Updated weights for policy 0, policy_version 30868 (0.0007) -[2024-07-05 12:59:42,491][09119] Updated weights for policy 0, policy_version 30878 (0.0007) -[2024-07-05 12:59:44,193][09119] Updated weights for policy 0, policy_version 30888 (0.0007) -[2024-07-05 12:59:44,420][05794] Fps is (10 sec: 48333.0, 60 sec: 48469.4, 300 sec: 48485.5). Total num frames: 233037824. Throughput: 0: 12100.1. Samples: 8237388. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 12:59:44,421][05794] Avg episode reward: [(0, '53.324')] -[2024-07-05 12:59:45,876][09119] Updated weights for policy 0, policy_version 30898 (0.0008) -[2024-07-05 12:59:47,556][09119] Updated weights for policy 0, policy_version 30908 (0.0008) -[2024-07-05 12:59:49,218][09119] Updated weights for policy 0, policy_version 30918 (0.0007) -[2024-07-05 12:59:49,420][05794] Fps is (10 sec: 49151.7, 60 sec: 48469.5, 300 sec: 48485.5). Total num frames: 233283584. Throughput: 0: 12113.9. Samples: 8310472. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:59:49,421][05794] Avg episode reward: [(0, '51.698')] -[2024-07-05 12:59:50,892][09119] Updated weights for policy 0, policy_version 30928 (0.0010) -[2024-07-05 12:59:52,591][09119] Updated weights for policy 0, policy_version 30938 (0.0007) -[2024-07-05 12:59:54,294][09119] Updated weights for policy 0, policy_version 30948 (0.0008) -[2024-07-05 12:59:54,420][05794] Fps is (10 sec: 48331.0, 60 sec: 48332.5, 300 sec: 48485.5). Total num frames: 233521152. Throughput: 0: 12118.7. Samples: 8346700. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:59:54,421][05794] Avg episode reward: [(0, '53.171')] -[2024-07-05 12:59:54,475][09099] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000030949_233529344.pth... -[2024-07-05 12:59:54,544][09099] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000029529_221896704.pth -[2024-07-05 12:59:55,974][09119] Updated weights for policy 0, policy_version 30958 (0.0008) -[2024-07-05 12:59:57,699][09119] Updated weights for policy 0, policy_version 30968 (0.0009) -[2024-07-05 12:59:59,420][05794] Fps is (10 sec: 47513.8, 60 sec: 48333.0, 300 sec: 48457.8). Total num frames: 233758720. Throughput: 0: 12118.5. Samples: 8419496. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 12:59:59,421][05794] Avg episode reward: [(0, '51.924')] -[2024-07-05 12:59:59,425][09119] Updated weights for policy 0, policy_version 30978 (0.0010) -[2024-07-05 13:00:01,092][09119] Updated weights for policy 0, policy_version 30988 (0.0008) -[2024-07-05 13:00:02,839][09119] Updated weights for policy 0, policy_version 30998 (0.0008) -[2024-07-05 13:00:04,420][05794] Fps is (10 sec: 48334.5, 60 sec: 48332.8, 300 sec: 48457.8). Total num frames: 234004480. Throughput: 0: 12101.2. Samples: 8491240. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:00:04,421][05794] Avg episode reward: [(0, '51.845')] -[2024-07-05 13:00:04,519][09119] Updated weights for policy 0, policy_version 31008 (0.0007) -[2024-07-05 13:00:06,190][09119] Updated weights for policy 0, policy_version 31018 (0.0008) -[2024-07-05 13:00:07,895][09119] Updated weights for policy 0, policy_version 31028 (0.0008) -[2024-07-05 13:00:09,420][05794] Fps is (10 sec: 48332.4, 60 sec: 48332.8, 300 sec: 48457.8). Total num frames: 234242048. Throughput: 0: 12101.7. Samples: 8527792. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:00:09,421][05794] Avg episode reward: [(0, '48.834')] -[2024-07-05 13:00:09,576][09119] Updated weights for policy 0, policy_version 31038 (0.0008) -[2024-07-05 13:00:11,275][09119] Updated weights for policy 0, policy_version 31048 (0.0008) -[2024-07-05 13:00:13,056][09119] Updated weights for policy 0, policy_version 31058 (0.0007) -[2024-07-05 13:00:14,420][05794] Fps is (10 sec: 47513.6, 60 sec: 48332.8, 300 sec: 48430.0). Total num frames: 234479616. Throughput: 0: 12093.1. Samples: 8599916. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:00:14,421][05794] Avg episode reward: [(0, '52.314')] -[2024-07-05 13:00:14,809][09119] Updated weights for policy 0, policy_version 31068 (0.0008) -[2024-07-05 13:00:16,573][09119] Updated weights for policy 0, policy_version 31078 (0.0009) -[2024-07-05 13:00:18,362][09119] Updated weights for policy 0, policy_version 31088 (0.0012) -[2024-07-05 13:00:19,420][05794] Fps is (10 sec: 47513.4, 60 sec: 48196.4, 300 sec: 48402.2). Total num frames: 234717184. Throughput: 0: 12023.2. Samples: 8669684. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:00:19,421][05794] Avg episode reward: [(0, '52.600')] -[2024-07-05 13:00:20,084][09119] Updated weights for policy 0, policy_version 31098 (0.0011) -[2024-07-05 13:00:21,812][09119] Updated weights for policy 0, policy_version 31108 (0.0008) -[2024-07-05 13:00:23,563][09119] Updated weights for policy 0, policy_version 31118 (0.0007) -[2024-07-05 13:00:24,420][05794] Fps is (10 sec: 46694.1, 60 sec: 48059.7, 300 sec: 48374.4). Total num frames: 234946560. Throughput: 0: 11993.7. Samples: 8704572. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:00:24,422][05794] Avg episode reward: [(0, '51.922')] -[2024-07-05 13:00:25,305][09119] Updated weights for policy 0, policy_version 31128 (0.0008) -[2024-07-05 13:00:27,072][09119] Updated weights for policy 0, policy_version 31138 (0.0008) -[2024-07-05 13:00:28,834][09119] Updated weights for policy 0, policy_version 31148 (0.0008) -[2024-07-05 13:00:29,420][05794] Fps is (10 sec: 46694.7, 60 sec: 47786.6, 300 sec: 48346.7). Total num frames: 235184128. Throughput: 0: 11938.2. Samples: 8774608. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:00:29,421][05794] Avg episode reward: [(0, '50.610')] -[2024-07-05 13:00:30,565][09119] Updated weights for policy 0, policy_version 31158 (0.0011) -[2024-07-05 13:00:32,297][09119] Updated weights for policy 0, policy_version 31168 (0.0008) -[2024-07-05 13:00:34,007][09119] Updated weights for policy 0, policy_version 31178 (0.0008) -[2024-07-05 13:00:34,420][05794] Fps is (10 sec: 47513.7, 60 sec: 47786.6, 300 sec: 48318.9). Total num frames: 235421696. Throughput: 0: 11899.7. Samples: 8845960. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:00:34,421][05794] Avg episode reward: [(0, '51.299')] -[2024-07-05 13:00:35,706][09119] Updated weights for policy 0, policy_version 31188 (0.0008) -[2024-07-05 13:00:37,388][09119] Updated weights for policy 0, policy_version 31198 (0.0007) -[2024-07-05 13:00:39,107][09119] Updated weights for policy 0, policy_version 31208 (0.0011) -[2024-07-05 13:00:39,420][05794] Fps is (10 sec: 47513.7, 60 sec: 47786.6, 300 sec: 48318.9). Total num frames: 235659264. Throughput: 0: 11898.8. Samples: 8882144. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:00:39,421][05794] Avg episode reward: [(0, '52.134')] -[2024-07-05 13:00:40,812][09119] Updated weights for policy 0, policy_version 31218 (0.0008) -[2024-07-05 13:00:42,506][09119] Updated weights for policy 0, policy_version 31228 (0.0008) -[2024-07-05 13:00:44,213][09119] Updated weights for policy 0, policy_version 31238 (0.0008) -[2024-07-05 13:00:44,420][05794] Fps is (10 sec: 48333.1, 60 sec: 47786.7, 300 sec: 48319.0). Total num frames: 235905024. Throughput: 0: 11876.6. Samples: 8953944. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:00:44,421][05794] Avg episode reward: [(0, '50.737')] -[2024-07-05 13:00:45,897][09119] Updated weights for policy 0, policy_version 31248 (0.0008) -[2024-07-05 13:00:47,628][09119] Updated weights for policy 0, policy_version 31258 (0.0010) -[2024-07-05 13:00:49,315][09119] Updated weights for policy 0, policy_version 31268 (0.0007) -[2024-07-05 13:00:49,420][05794] Fps is (10 sec: 48332.7, 60 sec: 47650.1, 300 sec: 48318.9). Total num frames: 236142592. Throughput: 0: 11877.7. Samples: 9025736. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:00:49,421][05794] Avg episode reward: [(0, '50.655')] -[2024-07-05 13:00:51,019][09119] Updated weights for policy 0, policy_version 31278 (0.0008) -[2024-07-05 13:00:52,723][09119] Updated weights for policy 0, policy_version 31288 (0.0008) -[2024-07-05 13:00:54,420][05794] Fps is (10 sec: 47513.4, 60 sec: 47650.4, 300 sec: 48291.2). Total num frames: 236380160. Throughput: 0: 11871.6. Samples: 9062016. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:00:54,421][05794] Avg episode reward: [(0, '50.844')] -[2024-07-05 13:00:54,446][09119] Updated weights for policy 0, policy_version 31298 (0.0008) -[2024-07-05 13:00:56,140][09119] Updated weights for policy 0, policy_version 31308 (0.0012) -[2024-07-05 13:00:57,852][09119] Updated weights for policy 0, policy_version 31318 (0.0009) -[2024-07-05 13:00:59,420][05794] Fps is (10 sec: 48332.8, 60 sec: 47786.6, 300 sec: 48318.9). Total num frames: 236625920. Throughput: 0: 11871.4. Samples: 9134128. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:00:59,421][05794] Avg episode reward: [(0, '53.767')] -[2024-07-05 13:00:59,554][09119] Updated weights for policy 0, policy_version 31328 (0.0007) -[2024-07-05 13:01:01,237][09119] Updated weights for policy 0, policy_version 31338 (0.0008) -[2024-07-05 13:01:02,951][09119] Updated weights for policy 0, policy_version 31348 (0.0007) -[2024-07-05 13:01:04,420][05794] Fps is (10 sec: 48332.7, 60 sec: 47650.1, 300 sec: 48291.2). Total num frames: 236863488. Throughput: 0: 11927.1. Samples: 9206404. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:01:04,421][05794] Avg episode reward: [(0, '51.730')] -[2024-07-05 13:01:04,675][09119] Updated weights for policy 0, policy_version 31358 (0.0011) -[2024-07-05 13:01:06,407][09119] Updated weights for policy 0, policy_version 31368 (0.0007) -[2024-07-05 13:01:08,081][09119] Updated weights for policy 0, policy_version 31378 (0.0009) -[2024-07-05 13:01:09,420][05794] Fps is (10 sec: 48333.0, 60 sec: 47786.7, 300 sec: 48291.2). Total num frames: 237109248. Throughput: 0: 11952.5. Samples: 9242432. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:01:09,421][05794] Avg episode reward: [(0, '53.348')] -[2024-07-05 13:01:09,759][09119] Updated weights for policy 0, policy_version 31388 (0.0007) -[2024-07-05 13:01:11,465][09119] Updated weights for policy 0, policy_version 31398 (0.0007) -[2024-07-05 13:01:13,158][09119] Updated weights for policy 0, policy_version 31408 (0.0008) -[2024-07-05 13:01:14,420][05794] Fps is (10 sec: 48332.8, 60 sec: 47786.6, 300 sec: 48291.1). Total num frames: 237346816. Throughput: 0: 11998.7. Samples: 9314548. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:01:14,421][05794] Avg episode reward: [(0, '52.678')] -[2024-07-05 13:01:14,901][09119] Updated weights for policy 0, policy_version 31418 (0.0009) -[2024-07-05 13:01:16,616][09119] Updated weights for policy 0, policy_version 31428 (0.0008) -[2024-07-05 13:01:18,307][09119] Updated weights for policy 0, policy_version 31438 (0.0007) -[2024-07-05 13:01:19,420][05794] Fps is (10 sec: 47511.7, 60 sec: 47786.4, 300 sec: 48263.3). Total num frames: 237584384. Throughput: 0: 12015.1. Samples: 9386644. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:01:19,422][05794] Avg episode reward: [(0, '51.361')] -[2024-07-05 13:01:20,024][09119] Updated weights for policy 0, policy_version 31448 (0.0007) -[2024-07-05 13:01:21,685][09119] Updated weights for policy 0, policy_version 31458 (0.0008) -[2024-07-05 13:01:23,375][09119] Updated weights for policy 0, policy_version 31468 (0.0010) -[2024-07-05 13:01:24,420][05794] Fps is (10 sec: 48333.2, 60 sec: 48059.8, 300 sec: 48291.1). Total num frames: 237830144. Throughput: 0: 12016.9. Samples: 9422904. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:01:24,421][05794] Avg episode reward: [(0, '53.216')] -[2024-07-05 13:01:25,085][09119] Updated weights for policy 0, policy_version 31478 (0.0008) -[2024-07-05 13:01:26,738][09119] Updated weights for policy 0, policy_version 31488 (0.0009) -[2024-07-05 13:01:28,452][09119] Updated weights for policy 0, policy_version 31498 (0.0009) -[2024-07-05 13:01:29,420][05794] Fps is (10 sec: 48334.6, 60 sec: 48059.7, 300 sec: 48263.4). Total num frames: 238067712. Throughput: 0: 12031.5. Samples: 9495364. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:01:29,421][05794] Avg episode reward: [(0, '49.794')] -[2024-07-05 13:01:30,156][09119] Updated weights for policy 0, policy_version 31508 (0.0009) -[2024-07-05 13:01:31,850][09119] Updated weights for policy 0, policy_version 31518 (0.0008) -[2024-07-05 13:01:33,581][09119] Updated weights for policy 0, policy_version 31528 (0.0013) -[2024-07-05 13:01:34,420][05794] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48291.1). Total num frames: 238313472. Throughput: 0: 12039.8. Samples: 9567528. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:01:34,421][05794] Avg episode reward: [(0, '50.555')] -[2024-07-05 13:01:35,280][09119] Updated weights for policy 0, policy_version 31538 (0.0010) -[2024-07-05 13:01:37,008][09119] Updated weights for policy 0, policy_version 31548 (0.0007) -[2024-07-05 13:01:38,707][09119] Updated weights for policy 0, policy_version 31558 (0.0008) -[2024-07-05 13:01:39,420][05794] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 238551040. Throughput: 0: 12032.5. Samples: 9603480. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:01:39,422][05794] Avg episode reward: [(0, '53.292')] -[2024-07-05 13:01:40,400][09119] Updated weights for policy 0, policy_version 31568 (0.0007) -[2024-07-05 13:01:42,092][09119] Updated weights for policy 0, policy_version 31578 (0.0008) -[2024-07-05 13:01:43,808][09119] Updated weights for policy 0, policy_version 31588 (0.0008) -[2024-07-05 13:01:44,420][05794] Fps is (10 sec: 47513.8, 60 sec: 48059.7, 300 sec: 48235.6). Total num frames: 238788608. Throughput: 0: 12032.0. Samples: 9675568. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:01:44,421][05794] Avg episode reward: [(0, '49.272')] -[2024-07-05 13:01:45,502][09119] Updated weights for policy 0, policy_version 31598 (0.0008) -[2024-07-05 13:01:47,243][09119] Updated weights for policy 0, policy_version 31608 (0.0008) -[2024-07-05 13:01:48,927][09119] Updated weights for policy 0, policy_version 31618 (0.0009) -[2024-07-05 13:01:49,420][05794] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 239034368. Throughput: 0: 12033.3. Samples: 9747900. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:01:49,421][05794] Avg episode reward: [(0, '51.011')] -[2024-07-05 13:01:50,633][09119] Updated weights for policy 0, policy_version 31628 (0.0010) -[2024-07-05 13:01:52,323][09119] Updated weights for policy 0, policy_version 31638 (0.0008) -[2024-07-05 13:01:54,011][09119] Updated weights for policy 0, policy_version 31648 (0.0010) -[2024-07-05 13:01:54,420][05794] Fps is (10 sec: 48332.4, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 239271936. Throughput: 0: 12030.3. Samples: 9783796. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:01:54,421][05794] Avg episode reward: [(0, '51.402')] -[2024-07-05 13:01:54,425][09099] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000031650_239271936.pth... -[2024-07-05 13:01:54,500][09099] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000030239_227713024.pth -[2024-07-05 13:01:55,750][09119] Updated weights for policy 0, policy_version 31658 (0.0007) -[2024-07-05 13:01:57,428][09119] Updated weights for policy 0, policy_version 31668 (0.0009) -[2024-07-05 13:01:59,129][09119] Updated weights for policy 0, policy_version 31678 (0.0008) -[2024-07-05 13:01:59,420][05794] Fps is (10 sec: 47513.9, 60 sec: 48059.8, 300 sec: 48207.9). Total num frames: 239509504. Throughput: 0: 12027.1. Samples: 9855768. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:01:59,421][05794] Avg episode reward: [(0, '51.041')] -[2024-07-05 13:02:00,826][09119] Updated weights for policy 0, policy_version 31688 (0.0007) -[2024-07-05 13:02:02,566][09119] Updated weights for policy 0, policy_version 31698 (0.0008) -[2024-07-05 13:02:04,292][09119] Updated weights for policy 0, policy_version 31708 (0.0011) -[2024-07-05 13:02:04,420][05794] Fps is (10 sec: 47513.5, 60 sec: 48059.7, 300 sec: 48207.8). Total num frames: 239747072. Throughput: 0: 12030.6. Samples: 9928016. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:02:04,421][05794] Avg episode reward: [(0, '51.545')] -[2024-07-05 13:02:06,014][09119] Updated weights for policy 0, policy_version 31718 (0.0011) -[2024-07-05 13:02:07,751][09119] Updated weights for policy 0, policy_version 31728 (0.0007) -[2024-07-05 13:02:09,420][05794] Fps is (10 sec: 47513.3, 60 sec: 47923.2, 300 sec: 48180.1). Total num frames: 239984640. Throughput: 0: 12027.1. Samples: 9964124. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:02:09,421][05794] Avg episode reward: [(0, '50.340')] -[2024-07-05 13:02:09,437][09119] Updated weights for policy 0, policy_version 31738 (0.0008) -[2024-07-05 13:02:11,135][09119] Updated weights for policy 0, policy_version 31748 (0.0009) -[2024-07-05 13:02:12,840][09119] Updated weights for policy 0, policy_version 31758 (0.0008) -[2024-07-05 13:02:14,420][05794] Fps is (10 sec: 48333.1, 60 sec: 48059.8, 300 sec: 48207.8). Total num frames: 240230400. Throughput: 0: 12002.8. Samples: 10035492. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:02:14,421][05794] Avg episode reward: [(0, '52.245')] -[2024-07-05 13:02:14,565][09119] Updated weights for policy 0, policy_version 31768 (0.0007) -[2024-07-05 13:02:16,230][09119] Updated weights for policy 0, policy_version 31778 (0.0007) -[2024-07-05 13:02:17,928][09119] Updated weights for policy 0, policy_version 31788 (0.0007) -[2024-07-05 13:02:19,420][05794] Fps is (10 sec: 48333.0, 60 sec: 48060.1, 300 sec: 48180.1). Total num frames: 240467968. Throughput: 0: 12007.5. Samples: 10107864. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:02:19,421][05794] Avg episode reward: [(0, '52.721')] -[2024-07-05 13:02:19,624][09119] Updated weights for policy 0, policy_version 31798 (0.0007) -[2024-07-05 13:02:21,337][09119] Updated weights for policy 0, policy_version 31808 (0.0008) -[2024-07-05 13:02:23,052][09119] Updated weights for policy 0, policy_version 31818 (0.0008) -[2024-07-05 13:02:24,420][05794] Fps is (10 sec: 48332.6, 60 sec: 48059.7, 300 sec: 48207.8). Total num frames: 240713728. Throughput: 0: 12007.6. Samples: 10143824. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:02:24,421][05794] Avg episode reward: [(0, '50.542')] -[2024-07-05 13:02:24,762][09119] Updated weights for policy 0, policy_version 31828 (0.0010) -[2024-07-05 13:02:26,445][09119] Updated weights for policy 0, policy_version 31838 (0.0009) -[2024-07-05 13:02:28,159][09119] Updated weights for policy 0, policy_version 31848 (0.0008) -[2024-07-05 13:02:29,420][05794] Fps is (10 sec: 48333.2, 60 sec: 48059.9, 300 sec: 48180.1). Total num frames: 240951296. Throughput: 0: 12013.3. Samples: 10216164. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:02:29,421][05794] Avg episode reward: [(0, '49.432')] -[2024-07-05 13:02:29,891][09119] Updated weights for policy 0, policy_version 31858 (0.0010) -[2024-07-05 13:02:31,563][09119] Updated weights for policy 0, policy_version 31868 (0.0008) -[2024-07-05 13:02:33,301][09119] Updated weights for policy 0, policy_version 31878 (0.0008) -[2024-07-05 13:02:34,420][05794] Fps is (10 sec: 47512.7, 60 sec: 47923.0, 300 sec: 48152.3). Total num frames: 241188864. Throughput: 0: 12003.3. Samples: 10288052. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:02:34,421][05794] Avg episode reward: [(0, '49.866')] -[2024-07-05 13:02:34,973][09119] Updated weights for policy 0, policy_version 31888 (0.0008) -[2024-07-05 13:02:36,676][09119] Updated weights for policy 0, policy_version 31898 (0.0008) -[2024-07-05 13:02:38,397][09119] Updated weights for policy 0, policy_version 31908 (0.0010) -[2024-07-05 13:02:39,420][05794] Fps is (10 sec: 48332.1, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 241434624. Throughput: 0: 12010.6. Samples: 10324272. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:02:39,421][05794] Avg episode reward: [(0, '53.231')] -[2024-07-05 13:02:40,059][09119] Updated weights for policy 0, policy_version 31918 (0.0007) -[2024-07-05 13:02:41,782][09119] Updated weights for policy 0, policy_version 31928 (0.0008) -[2024-07-05 13:02:43,492][09119] Updated weights for policy 0, policy_version 31938 (0.0012) -[2024-07-05 13:02:44,420][05794] Fps is (10 sec: 48332.9, 60 sec: 48059.5, 300 sec: 48152.3). Total num frames: 241672192. Throughput: 0: 12016.1. Samples: 10396496. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:02:44,421][05794] Avg episode reward: [(0, '50.635')] -[2024-07-05 13:02:45,216][09119] Updated weights for policy 0, policy_version 31948 (0.0010) -[2024-07-05 13:02:46,924][09119] Updated weights for policy 0, policy_version 31958 (0.0007) -[2024-07-05 13:02:48,605][09119] Updated weights for policy 0, policy_version 31968 (0.0007) -[2024-07-05 13:02:49,420][05794] Fps is (10 sec: 47512.6, 60 sec: 47923.0, 300 sec: 48152.3). Total num frames: 241909760. Throughput: 0: 12002.4. Samples: 10468124. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:02:49,421][05794] Avg episode reward: [(0, '50.624')] -[2024-07-05 13:02:50,267][09119] Updated weights for policy 0, policy_version 31978 (0.0011) -[2024-07-05 13:02:51,971][09119] Updated weights for policy 0, policy_version 31988 (0.0008) -[2024-07-05 13:02:53,690][09119] Updated weights for policy 0, policy_version 31998 (0.0010) -[2024-07-05 13:02:54,420][05794] Fps is (10 sec: 48333.7, 60 sec: 48059.8, 300 sec: 48152.3). Total num frames: 242155520. Throughput: 0: 11997.7. Samples: 10504020. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:02:54,421][05794] Avg episode reward: [(0, '47.775')] -[2024-07-05 13:02:55,400][09119] Updated weights for policy 0, policy_version 32008 (0.0011) -[2024-07-05 13:02:57,116][09119] Updated weights for policy 0, policy_version 32018 (0.0008) -[2024-07-05 13:02:58,847][09119] Updated weights for policy 0, policy_version 32028 (0.0008) -[2024-07-05 13:02:59,420][05794] Fps is (10 sec: 48334.0, 60 sec: 48059.7, 300 sec: 48124.5). Total num frames: 242393088. Throughput: 0: 12015.6. Samples: 10576192. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:02:59,421][05794] Avg episode reward: [(0, '49.749')] -[2024-07-05 13:03:00,580][09119] Updated weights for policy 0, policy_version 32038 (0.0010) -[2024-07-05 13:03:02,263][09119] Updated weights for policy 0, policy_version 32048 (0.0007) -[2024-07-05 13:03:03,990][09119] Updated weights for policy 0, policy_version 32058 (0.0011) -[2024-07-05 13:03:04,420][05794] Fps is (10 sec: 47513.4, 60 sec: 48059.7, 300 sec: 48096.8). Total num frames: 242630656. Throughput: 0: 12009.8. Samples: 10648308. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:03:04,422][05794] Avg episode reward: [(0, '50.112')] -[2024-07-05 13:03:05,694][09119] Updated weights for policy 0, policy_version 32068 (0.0008) -[2024-07-05 13:03:07,412][09119] Updated weights for policy 0, policy_version 32078 (0.0010) -[2024-07-05 13:03:09,099][09119] Updated weights for policy 0, policy_version 32088 (0.0007) -[2024-07-05 13:03:09,420][05794] Fps is (10 sec: 47513.1, 60 sec: 48059.7, 300 sec: 48096.8). Total num frames: 242868224. Throughput: 0: 12017.1. Samples: 10684592. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:03:09,421][05794] Avg episode reward: [(0, '52.438')] -[2024-07-05 13:03:10,787][09119] Updated weights for policy 0, policy_version 32098 (0.0007) -[2024-07-05 13:03:12,487][09119] Updated weights for policy 0, policy_version 32108 (0.0008) -[2024-07-05 13:03:14,239][09119] Updated weights for policy 0, policy_version 32118 (0.0007) -[2024-07-05 13:03:14,420][05794] Fps is (10 sec: 48332.7, 60 sec: 48059.7, 300 sec: 48096.7). Total num frames: 243113984. Throughput: 0: 11998.7. Samples: 10756108. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:03:14,421][05794] Avg episode reward: [(0, '50.993')] -[2024-07-05 13:03:15,938][09119] Updated weights for policy 0, policy_version 32128 (0.0007) -[2024-07-05 13:03:17,659][09119] Updated weights for policy 0, policy_version 32138 (0.0008) -[2024-07-05 13:03:19,388][09119] Updated weights for policy 0, policy_version 32148 (0.0007) -[2024-07-05 13:03:19,420][05794] Fps is (10 sec: 48333.2, 60 sec: 48059.7, 300 sec: 48096.8). Total num frames: 243351552. Throughput: 0: 12009.6. Samples: 10828480. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:03:19,421][05794] Avg episode reward: [(0, '52.062')] -[2024-07-05 13:03:21,031][09119] Updated weights for policy 0, policy_version 32158 (0.0011) -[2024-07-05 13:03:22,704][09119] Updated weights for policy 0, policy_version 32168 (0.0007) -[2024-07-05 13:03:24,420][05794] Fps is (10 sec: 47514.2, 60 sec: 47923.3, 300 sec: 48069.0). Total num frames: 243589120. Throughput: 0: 12004.1. Samples: 10864456. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:03:24,421][05794] Avg episode reward: [(0, '51.472')] -[2024-07-05 13:03:24,462][09119] Updated weights for policy 0, policy_version 32178 (0.0009) -[2024-07-05 13:03:26,169][09119] Updated weights for policy 0, policy_version 32188 (0.0007) -[2024-07-05 13:03:27,862][09119] Updated weights for policy 0, policy_version 32198 (0.0007) -[2024-07-05 13:03:29,420][05794] Fps is (10 sec: 48332.9, 60 sec: 48059.6, 300 sec: 48096.8). Total num frames: 243834880. Throughput: 0: 11999.0. Samples: 10936448. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:03:29,421][05794] Avg episode reward: [(0, '52.438')] -[2024-07-05 13:03:29,578][09119] Updated weights for policy 0, policy_version 32208 (0.0008) -[2024-07-05 13:03:31,272][09119] Updated weights for policy 0, policy_version 32218 (0.0008) -[2024-07-05 13:03:32,998][09119] Updated weights for policy 0, policy_version 32228 (0.0010) -[2024-07-05 13:03:34,420][05794] Fps is (10 sec: 48332.4, 60 sec: 48059.9, 300 sec: 48069.0). Total num frames: 244072448. Throughput: 0: 12013.2. Samples: 11008716. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:03:34,421][05794] Avg episode reward: [(0, '53.190')] -[2024-07-05 13:03:34,716][09119] Updated weights for policy 0, policy_version 32238 (0.0007) -[2024-07-05 13:03:36,369][09119] Updated weights for policy 0, policy_version 32248 (0.0008) -[2024-07-05 13:03:38,097][09119] Updated weights for policy 0, policy_version 32258 (0.0007) -[2024-07-05 13:03:39,420][05794] Fps is (10 sec: 47513.8, 60 sec: 47923.2, 300 sec: 48069.0). Total num frames: 244310016. Throughput: 0: 12013.1. Samples: 11044608. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:03:39,421][05794] Avg episode reward: [(0, '50.989')] -[2024-07-05 13:03:39,769][09119] Updated weights for policy 0, policy_version 32268 (0.0008) -[2024-07-05 13:03:41,474][09119] Updated weights for policy 0, policy_version 32278 (0.0008) -[2024-07-05 13:03:43,153][09119] Updated weights for policy 0, policy_version 32288 (0.0007) -[2024-07-05 13:03:44,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48059.9, 300 sec: 48069.0). Total num frames: 244555776. Throughput: 0: 12016.7. Samples: 11116944. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:03:44,421][05794] Avg episode reward: [(0, '51.290')] -[2024-07-05 13:03:44,871][09119] Updated weights for policy 0, policy_version 32298 (0.0008) -[2024-07-05 13:03:46,567][09119] Updated weights for policy 0, policy_version 32308 (0.0008) -[2024-07-05 13:03:48,296][09119] Updated weights for policy 0, policy_version 32318 (0.0008) -[2024-07-05 13:03:49,420][05794] Fps is (10 sec: 48332.8, 60 sec: 48059.9, 300 sec: 48041.2). Total num frames: 244793344. Throughput: 0: 12017.4. Samples: 11189092. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:03:49,421][05794] Avg episode reward: [(0, '48.923')] -[2024-07-05 13:03:50,006][09119] Updated weights for policy 0, policy_version 32328 (0.0011) -[2024-07-05 13:03:51,694][09119] Updated weights for policy 0, policy_version 32338 (0.0008) -[2024-07-05 13:03:53,403][09119] Updated weights for policy 0, policy_version 32348 (0.0007) -[2024-07-05 13:03:54,420][05794] Fps is (10 sec: 48332.9, 60 sec: 48059.7, 300 sec: 48069.0). Total num frames: 245039104. Throughput: 0: 12013.5. Samples: 11225200. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:03:54,421][05794] Avg episode reward: [(0, '52.319')] -[2024-07-05 13:03:54,425][09099] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000032354_245039104.pth... -[2024-07-05 13:03:54,492][09099] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000030949_233529344.pth -[2024-07-05 13:03:55,109][09119] Updated weights for policy 0, policy_version 32358 (0.0009) -[2024-07-05 13:03:56,840][09119] Updated weights for policy 0, policy_version 32368 (0.0008) -[2024-07-05 13:03:58,530][09119] Updated weights for policy 0, policy_version 32378 (0.0007) -[2024-07-05 13:03:59,420][05794] Fps is (10 sec: 48332.3, 60 sec: 48059.7, 300 sec: 48041.2). Total num frames: 245276672. Throughput: 0: 12017.5. Samples: 11296896. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:03:59,421][05794] Avg episode reward: [(0, '50.449')] -[2024-07-05 13:04:00,223][09119] Updated weights for policy 0, policy_version 32388 (0.0009) -[2024-07-05 13:04:01,948][09119] Updated weights for policy 0, policy_version 32398 (0.0007) -[2024-07-05 13:04:03,661][09119] Updated weights for policy 0, policy_version 32408 (0.0008) -[2024-07-05 13:04:04,420][05794] Fps is (10 sec: 47513.6, 60 sec: 48059.8, 300 sec: 48041.2). Total num frames: 245514240. Throughput: 0: 12010.8. Samples: 11368968. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:04:04,421][05794] Avg episode reward: [(0, '50.294')] -[2024-07-05 13:04:05,313][09119] Updated weights for policy 0, policy_version 32418 (0.0007) -[2024-07-05 13:04:07,040][09119] Updated weights for policy 0, policy_version 32428 (0.0007) -[2024-07-05 13:04:08,758][09119] Updated weights for policy 0, policy_version 32438 (0.0008) -[2024-07-05 13:04:09,420][05794] Fps is (10 sec: 47513.1, 60 sec: 48059.7, 300 sec: 48041.2). Total num frames: 245751808. Throughput: 0: 12016.2. Samples: 11405188. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:04:09,421][05794] Avg episode reward: [(0, '51.203')] -[2024-07-05 13:04:10,439][09119] Updated weights for policy 0, policy_version 32448 (0.0008) -[2024-07-05 13:04:12,198][09119] Updated weights for policy 0, policy_version 32458 (0.0009) -[2024-07-05 13:04:13,956][09119] Updated weights for policy 0, policy_version 32468 (0.0008) -[2024-07-05 13:04:14,420][05794] Fps is (10 sec: 47513.6, 60 sec: 47923.2, 300 sec: 48013.5). Total num frames: 245989376. Throughput: 0: 12006.8. Samples: 11476756. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:04:14,421][05794] Avg episode reward: [(0, '50.877')] -[2024-07-05 13:04:15,670][09119] Updated weights for policy 0, policy_version 32478 (0.0008) -[2024-07-05 13:04:17,422][09119] Updated weights for policy 0, policy_version 32488 (0.0007) -[2024-07-05 13:04:19,108][09119] Updated weights for policy 0, policy_version 32498 (0.0007) -[2024-07-05 13:04:19,420][05794] Fps is (10 sec: 47514.2, 60 sec: 47923.2, 300 sec: 48013.4). Total num frames: 246226944. Throughput: 0: 11973.3. Samples: 11547516. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:04:19,421][05794] Avg episode reward: [(0, '51.755')] -[2024-07-05 13:04:20,822][09119] Updated weights for policy 0, policy_version 32508 (0.0009) -[2024-07-05 13:04:22,551][09119] Updated weights for policy 0, policy_version 32518 (0.0008) -[2024-07-05 13:04:24,270][09119] Updated weights for policy 0, policy_version 32528 (0.0007) -[2024-07-05 13:04:24,420][05794] Fps is (10 sec: 47513.6, 60 sec: 47923.1, 300 sec: 47957.9). Total num frames: 246464512. Throughput: 0: 11975.5. Samples: 11583508. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:04:24,421][05794] Avg episode reward: [(0, '51.438')] -[2024-07-05 13:04:25,985][09119] Updated weights for policy 0, policy_version 32538 (0.0009) -[2024-07-05 13:04:27,712][09119] Updated weights for policy 0, policy_version 32548 (0.0008) -[2024-07-05 13:04:29,420][05794] Fps is (10 sec: 47513.5, 60 sec: 47786.6, 300 sec: 47957.9). Total num frames: 246702080. Throughput: 0: 11970.3. Samples: 11655608. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:04:29,421][05794] Avg episode reward: [(0, '54.387')] -[2024-07-05 13:04:29,428][09119] Updated weights for policy 0, policy_version 32558 (0.0008) -[2024-07-05 13:04:31,132][09119] Updated weights for policy 0, policy_version 32568 (0.0008) -[2024-07-05 13:04:32,822][09119] Updated weights for policy 0, policy_version 32578 (0.0007) -[2024-07-05 13:04:34,420][05794] Fps is (10 sec: 48332.7, 60 sec: 47923.2, 300 sec: 47985.7). Total num frames: 246947840. Throughput: 0: 11960.7. Samples: 11727324. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:04:34,421][05794] Avg episode reward: [(0, '50.740')] -[2024-07-05 13:04:34,497][09119] Updated weights for policy 0, policy_version 32588 (0.0008) -[2024-07-05 13:04:36,143][09119] Updated weights for policy 0, policy_version 32598 (0.0008) -[2024-07-05 13:04:37,865][09119] Updated weights for policy 0, policy_version 32608 (0.0009) -[2024-07-05 13:04:39,420][05794] Fps is (10 sec: 48332.7, 60 sec: 47923.1, 300 sec: 47957.9). Total num frames: 247185408. Throughput: 0: 11971.4. Samples: 11763912. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:04:39,421][05794] Avg episode reward: [(0, '52.729')] -[2024-07-05 13:04:39,551][09119] Updated weights for policy 0, policy_version 32618 (0.0010) -[2024-07-05 13:04:41,243][09119] Updated weights for policy 0, policy_version 32628 (0.0008) -[2024-07-05 13:04:43,012][09119] Updated weights for policy 0, policy_version 32638 (0.0011) -[2024-07-05 13:04:44,420][05794] Fps is (10 sec: 48332.8, 60 sec: 47923.2, 300 sec: 47957.9). Total num frames: 247431168. Throughput: 0: 11976.8. Samples: 11835852. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:04:44,421][05794] Avg episode reward: [(0, '49.911')] -[2024-07-05 13:04:44,665][09119] Updated weights for policy 0, policy_version 32648 (0.0007) -[2024-07-05 13:04:46,355][09119] Updated weights for policy 0, policy_version 32658 (0.0007) -[2024-07-05 13:04:48,082][09119] Updated weights for policy 0, policy_version 32668 (0.0010) -[2024-07-05 13:04:49,420][05794] Fps is (10 sec: 48332.9, 60 sec: 47923.1, 300 sec: 47958.0). Total num frames: 247668736. Throughput: 0: 11980.1. Samples: 11908072. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:04:49,421][05794] Avg episode reward: [(0, '50.651')] -[2024-07-05 13:04:49,774][09119] Updated weights for policy 0, policy_version 32678 (0.0007) -[2024-07-05 13:04:51,446][09119] Updated weights for policy 0, policy_version 32688 (0.0008) -[2024-07-05 13:04:53,220][09119] Updated weights for policy 0, policy_version 32698 (0.0008) -[2024-07-05 13:04:54,420][05794] Fps is (10 sec: 48333.1, 60 sec: 47923.2, 300 sec: 47985.7). Total num frames: 247914496. Throughput: 0: 11983.3. Samples: 11944432. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:04:54,421][05794] Avg episode reward: [(0, '52.763')] -[2024-07-05 13:04:54,923][09119] Updated weights for policy 0, policy_version 32708 (0.0007) -[2024-07-05 13:04:56,567][09119] Updated weights for policy 0, policy_version 32718 (0.0007) -[2024-07-05 13:04:58,300][09119] Updated weights for policy 0, policy_version 32728 (0.0007) -[2024-07-05 13:04:59,420][05794] Fps is (10 sec: 48333.4, 60 sec: 47923.3, 300 sec: 47957.9). Total num frames: 248152064. Throughput: 0: 11999.7. Samples: 12016740. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:04:59,421][05794] Avg episode reward: [(0, '51.832')] -[2024-07-05 13:04:59,984][09119] Updated weights for policy 0, policy_version 32738 (0.0008) -[2024-07-05 13:05:01,650][09119] Updated weights for policy 0, policy_version 32748 (0.0007) -[2024-07-05 13:05:03,371][09119] Updated weights for policy 0, policy_version 32758 (0.0008) -[2024-07-05 13:05:04,420][05794] Fps is (10 sec: 48332.6, 60 sec: 48059.7, 300 sec: 47985.7). Total num frames: 248397824. Throughput: 0: 12028.1. Samples: 12088780. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:05:04,421][05794] Avg episode reward: [(0, '55.043')] -[2024-07-05 13:05:05,132][09119] Updated weights for policy 0, policy_version 32768 (0.0008) -[2024-07-05 13:05:06,839][09119] Updated weights for policy 0, policy_version 32778 (0.0010) -[2024-07-05 13:05:08,619][09119] Updated weights for policy 0, policy_version 32788 (0.0009) -[2024-07-05 13:05:09,420][05794] Fps is (10 sec: 47512.5, 60 sec: 47923.2, 300 sec: 47957.9). Total num frames: 248627200. Throughput: 0: 12021.8. Samples: 12124492. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:05:09,422][05794] Avg episode reward: [(0, '52.561')] -[2024-07-05 13:05:10,355][09119] Updated weights for policy 0, policy_version 32798 (0.0007) -[2024-07-05 13:05:12,203][09119] Updated weights for policy 0, policy_version 32808 (0.0008) -[2024-07-05 13:05:13,939][09119] Updated weights for policy 0, policy_version 32818 (0.0008) -[2024-07-05 13:05:14,420][05794] Fps is (10 sec: 45875.5, 60 sec: 47786.7, 300 sec: 47930.2). Total num frames: 248856576. Throughput: 0: 11954.0. Samples: 12193536. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:05:14,421][05794] Avg episode reward: [(0, '53.889')] -[2024-07-05 13:05:15,701][09119] Updated weights for policy 0, policy_version 32828 (0.0008) -[2024-07-05 13:05:17,432][09119] Updated weights for policy 0, policy_version 32838 (0.0008) -[2024-07-05 13:05:19,172][09119] Updated weights for policy 0, policy_version 32848 (0.0008) -[2024-07-05 13:05:19,420][05794] Fps is (10 sec: 46695.2, 60 sec: 47786.7, 300 sec: 47957.9). Total num frames: 249094144. Throughput: 0: 11922.5. Samples: 12263836. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:05:19,421][05794] Avg episode reward: [(0, '51.144')] -[2024-07-05 13:05:20,897][09119] Updated weights for policy 0, policy_version 32858 (0.0010) -[2024-07-05 13:05:22,619][09119] Updated weights for policy 0, policy_version 32868 (0.0007) -[2024-07-05 13:05:24,301][09119] Updated weights for policy 0, policy_version 32878 (0.0007) -[2024-07-05 13:05:24,420][05794] Fps is (10 sec: 47512.2, 60 sec: 47786.5, 300 sec: 47957.9). Total num frames: 249331712. Throughput: 0: 11893.3. Samples: 12299112. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:05:24,422][05794] Avg episode reward: [(0, '51.044')] -[2024-07-05 13:05:26,003][09119] Updated weights for policy 0, policy_version 32888 (0.0008) -[2024-07-05 13:05:27,702][09119] Updated weights for policy 0, policy_version 32898 (0.0007) -[2024-07-05 13:05:29,382][09119] Updated weights for policy 0, policy_version 32908 (0.0009) -[2024-07-05 13:05:29,420][05794] Fps is (10 sec: 48332.8, 60 sec: 47923.2, 300 sec: 47985.7). Total num frames: 249577472. Throughput: 0: 11915.5. Samples: 12372048. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:05:29,421][05794] Avg episode reward: [(0, '51.210')] -[2024-07-05 13:05:31,071][09119] Updated weights for policy 0, policy_version 32918 (0.0009) -[2024-07-05 13:05:32,748][09119] Updated weights for policy 0, policy_version 32928 (0.0007) -[2024-07-05 13:05:34,420][05794] Fps is (10 sec: 48333.8, 60 sec: 47786.7, 300 sec: 47985.7). Total num frames: 249815040. Throughput: 0: 11928.2. Samples: 12444840. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:05:34,421][05794] Avg episode reward: [(0, '51.905')] -[2024-07-05 13:05:34,426][09119] Updated weights for policy 0, policy_version 32938 (0.0010) -[2024-07-05 13:05:36,143][09119] Updated weights for policy 0, policy_version 32948 (0.0008) -[2024-07-05 13:05:37,841][09119] Updated weights for policy 0, policy_version 32958 (0.0007) -[2024-07-05 13:05:38,361][09099] Stopping Batcher_0... -[2024-07-05 13:05:38,361][09099] Loop batcher_evt_loop terminating... -[2024-07-05 13:05:38,361][05794] Component Batcher_0 stopped! -[2024-07-05 13:05:38,362][09099] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000032961_250011648.pth... -[2024-07-05 13:05:38,389][09123] Stopping RolloutWorker_w3... -[2024-07-05 13:05:38,390][09121] Stopping RolloutWorker_w1... -[2024-07-05 13:05:38,390][09123] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 13:05:38,390][09121] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 13:05:38,391][09147] Stopping RolloutWorker_w13... -[2024-07-05 13:05:38,390][05794] Component RolloutWorker_w3 stopped! -[2024-07-05 13:05:38,391][09147] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 13:05:38,391][09148] Stopping RolloutWorker_w12... -[2024-07-05 13:05:38,391][09122] Stopping RolloutWorker_w2... -[2024-07-05 13:05:38,392][09148] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 13:05:38,392][09122] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 13:05:38,391][05794] Component RolloutWorker_w1 stopped! -[2024-07-05 13:05:38,392][09126] Stopping RolloutWorker_w5... -[2024-07-05 13:05:38,392][09131] Stopping RolloutWorker_w11... -[2024-07-05 13:05:38,392][09151] Stopping RolloutWorker_w15... -[2024-07-05 13:05:38,392][09126] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 13:05:38,392][09129] Stopping RolloutWorker_w9... -[2024-07-05 13:05:38,393][09151] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 13:05:38,393][09131] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 13:05:38,393][09129] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 13:05:38,393][09127] Stopping RolloutWorker_w8... -[2024-07-05 13:05:38,393][09127] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 13:05:38,392][05794] Component RolloutWorker_w13 stopped! -[2024-07-05 13:05:38,394][09130] Stopping RolloutWorker_w10... -[2024-07-05 13:05:38,394][09125] Stopping RolloutWorker_w6... -[2024-07-05 13:05:38,394][09125] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 13:05:38,394][09130] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 13:05:38,395][09128] Stopping RolloutWorker_w7... -[2024-07-05 13:05:38,395][09128] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 13:05:38,396][09149] Stopping RolloutWorker_w14... -[2024-07-05 13:05:38,396][09149] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 13:05:38,393][05794] Component RolloutWorker_w12 stopped! -[2024-07-05 13:05:38,399][05794] Component RolloutWorker_w2 stopped! -[2024-07-05 13:05:38,399][05794] Component RolloutWorker_w5 stopped! -[2024-07-05 13:05:38,400][05794] Component RolloutWorker_w11 stopped! -[2024-07-05 13:05:38,401][05794] Component RolloutWorker_w15 stopped! -[2024-07-05 13:05:38,402][05794] Component RolloutWorker_w9 stopped! -[2024-07-05 13:05:38,403][05794] Component RolloutWorker_w8 stopped! -[2024-07-05 13:05:38,404][05794] Component RolloutWorker_w6 stopped! -[2024-07-05 13:05:38,404][05794] Component RolloutWorker_w10 stopped! -[2024-07-05 13:05:38,405][05794] Component RolloutWorker_w7 stopped! -[2024-07-05 13:05:38,406][05794] Component RolloutWorker_w14 stopped! -[2024-07-05 13:05:38,415][09124] Stopping RolloutWorker_w4... -[2024-07-05 13:05:38,416][09124] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 13:05:38,415][05794] Component RolloutWorker_w4 stopped! -[2024-07-05 13:05:38,433][09119] Weights refcount: 2 0 -[2024-07-05 13:05:38,435][09119] Stopping InferenceWorker_p0-w0... -[2024-07-05 13:05:38,436][09119] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 13:05:38,436][05794] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 13:05:38,444][09120] Stopping RolloutWorker_w0... -[2024-07-05 13:05:38,444][09120] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 13:05:38,444][05794] Component RolloutWorker_w0 stopped! -[2024-07-05 13:05:38,463][09099] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000031650_239271936.pth -[2024-07-05 13:05:38,475][09099] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000032961_250011648.pth... -[2024-07-05 13:05:38,583][09099] Stopping LearnerWorker_p0... -[2024-07-05 13:05:38,583][09099] Loop learner_proc0_evt_loop terminating... -[2024-07-05 13:05:38,583][05794] Component LearnerWorker_p0 stopped! -[2024-07-05 13:05:38,584][05794] Waiting for process learner_proc0 to stop... -[2024-07-05 13:05:39,843][05794] Waiting for process inference_proc0-0 to join... -[2024-07-05 13:05:39,844][05794] Waiting for process rollout_proc0 to join... -[2024-07-05 13:05:39,845][05794] Waiting for process rollout_proc1 to join... -[2024-07-05 13:05:39,845][05794] Waiting for process rollout_proc2 to join... -[2024-07-05 13:05:39,845][05794] Waiting for process rollout_proc3 to join... -[2024-07-05 13:05:39,846][05794] Waiting for process rollout_proc4 to join... -[2024-07-05 13:05:39,846][05794] Waiting for process rollout_proc5 to join... -[2024-07-05 13:05:39,847][05794] Waiting for process rollout_proc6 to join... -[2024-07-05 13:05:39,847][05794] Waiting for process rollout_proc7 to join... -[2024-07-05 13:05:39,847][05794] Waiting for process rollout_proc8 to join... -[2024-07-05 13:05:39,848][05794] Waiting for process rollout_proc9 to join... -[2024-07-05 13:05:39,848][05794] Waiting for process rollout_proc10 to join... -[2024-07-05 13:05:39,849][05794] Waiting for process rollout_proc11 to join... -[2024-07-05 13:05:39,849][05794] Waiting for process rollout_proc12 to join... -[2024-07-05 13:05:39,849][05794] Waiting for process rollout_proc13 to join... -[2024-07-05 13:05:39,850][05794] Waiting for process rollout_proc14 to join... -[2024-07-05 13:05:39,850][05794] Waiting for process rollout_proc15 to join... -[2024-07-05 13:05:39,851][05794] Batcher 0 profile tree view: -batching: 76.4995, releasing_batches: 0.1331 -[2024-07-05 13:05:39,851][05794] InferenceWorker_p0-w0 profile tree view: -wait_policy: 0.0000 - wait_policy_total: 54.1662 -update_model: 15.8399 - weight_update: 0.0007 -one_step: 0.0027 - handle_policy_step: 950.6274 - deserialize: 72.8387, stack: 5.3281, obs_to_device_normalize: 225.4008, forward: 440.7349, send_messages: 47.8202 - prepare_outputs: 126.1618 - to_cpu: 78.2996 -[2024-07-05 13:05:39,852][05794] Learner 0 profile tree view: -misc: 0.0245, prepare_batch: 100.8392 -train: 233.6917 - epoch_init: 0.0197, minibatch_init: 0.0275, losses_postprocess: 1.0699, kl_divergence: 1.2589, after_optimizer: 1.1018 - calculate_losses: 90.3553 - losses_init: 0.0109, forward_head: 3.6206, bptt_initial: 73.2381, tail: 2.9218, advantages_returns: 0.8126, losses: 4.2579 - bptt: 4.5560 - bptt_forward_core: 4.3333 - update: 137.9544 - clip: 3.7878 -[2024-07-05 13:05:39,852][05794] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.4786, enqueue_policy_requests: 30.7842, env_step: 500.2267, overhead: 50.0167, complete_rollouts: 1.0497 -save_policy_outputs: 38.1660 - split_output_tensors: 17.6141 -[2024-07-05 13:05:39,853][05794] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.4906, enqueue_policy_requests: 32.2533, env_step: 513.0497, overhead: 53.9027, complete_rollouts: 1.1660 -save_policy_outputs: 38.1503 - split_output_tensors: 17.6823 -[2024-07-05 13:05:39,853][05794] Loop Runner_EvtLoop terminating... -[2024-07-05 13:05:39,854][05794] Runner profile tree view: -main_loop: 1061.8633 -[2024-07-05 13:05:39,854][05794] Collected {0: 250011648}, FPS: 47083.1 -[2024-07-05 13:05:56,088][05794] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 13:05:56,088][05794] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 13:05:56,089][05794] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 13:05:56,089][05794] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 13:05:56,090][05794] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 13:05:56,090][05794] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 13:05:56,090][05794] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 13:05:56,090][05794] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 13:05:56,091][05794] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 13:05:56,091][05794] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 13:05:56,091][05794] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 13:05:56,091][05794] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 13:05:56,092][05794] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 13:05:56,092][05794] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 13:05:56,092][05794] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 13:05:56,104][05794] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 13:05:56,105][05794] RunningMeanStd input shape: (1,) -[2024-07-05 13:05:56,111][05794] ConvEncoder: input_channels=3 -[2024-07-05 13:05:56,130][05794] Conv encoder output size: 512 -[2024-07-05 13:05:56,131][05794] Policy head output size: 512 -[2024-07-05 13:05:56,148][05794] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000032961_250011648.pth... -[2024-07-05 13:05:56,728][05794] Num frames 100... -[2024-07-05 13:05:56,789][05794] Num frames 200... -[2024-07-05 13:05:56,852][05794] Num frames 300... -[2024-07-05 13:05:56,912][05794] Num frames 400... -[2024-07-05 13:05:56,984][05794] Num frames 500... -[2024-07-05 13:05:57,043][05794] Num frames 600... -[2024-07-05 13:05:57,105][05794] Num frames 700... -[2024-07-05 13:05:57,167][05794] Num frames 800... -[2024-07-05 13:05:57,227][05794] Num frames 900... -[2024-07-05 13:05:57,286][05794] Num frames 1000... -[2024-07-05 13:05:57,347][05794] Num frames 1100... -[2024-07-05 13:05:57,407][05794] Num frames 1200... -[2024-07-05 13:05:57,465][05794] Num frames 1300... -[2024-07-05 13:05:57,565][05794] Avg episode rewards: #0: 33.760, true rewards: #0: 13.760 -[2024-07-05 13:05:57,566][05794] Avg episode reward: 33.760, avg true_objective: 13.760 -[2024-07-05 13:05:57,586][05794] Num frames 1400... -[2024-07-05 13:05:57,650][05794] Num frames 1500... -[2024-07-05 13:05:57,712][05794] Num frames 1600... -[2024-07-05 13:05:57,777][05794] Num frames 1700... -[2024-07-05 13:05:57,842][05794] Num frames 1800... -[2024-07-05 13:05:57,907][05794] Num frames 1900... -[2024-07-05 13:05:57,971][05794] Num frames 2000... -[2024-07-05 13:05:58,035][05794] Num frames 2100... -[2024-07-05 13:05:58,100][05794] Num frames 2200... -[2024-07-05 13:05:58,163][05794] Num frames 2300... -[2024-07-05 13:05:58,226][05794] Num frames 2400... -[2024-07-05 13:05:58,287][05794] Num frames 2500... -[2024-07-05 13:05:58,350][05794] Num frames 2600... -[2024-07-05 13:05:58,412][05794] Num frames 2700... -[2024-07-05 13:05:58,475][05794] Num frames 2800... -[2024-07-05 13:05:58,537][05794] Num frames 2900... -[2024-07-05 13:05:58,598][05794] Num frames 3000... -[2024-07-05 13:05:58,660][05794] Num frames 3100... -[2024-07-05 13:05:58,719][05794] Num frames 3200... -[2024-07-05 13:05:58,781][05794] Num frames 3300... -[2024-07-05 13:05:58,855][05794] Num frames 3400... -[2024-07-05 13:05:58,956][05794] Avg episode rewards: #0: 46.379, true rewards: #0: 17.380 -[2024-07-05 13:05:58,958][05794] Avg episode reward: 46.379, avg true_objective: 17.380 -[2024-07-05 13:05:58,980][05794] Num frames 3500... -[2024-07-05 13:05:59,040][05794] Num frames 3600... -[2024-07-05 13:05:59,099][05794] Num frames 3700... -[2024-07-05 13:05:59,159][05794] Num frames 3800... -[2024-07-05 13:05:59,219][05794] Num frames 3900... -[2024-07-05 13:05:59,281][05794] Num frames 4000... -[2024-07-05 13:05:59,342][05794] Num frames 4100... -[2024-07-05 13:05:59,405][05794] Num frames 4200... -[2024-07-05 13:05:59,479][05794] Num frames 4300... -[2024-07-05 13:05:59,540][05794] Num frames 4400... -[2024-07-05 13:05:59,598][05794] Num frames 4500... -[2024-07-05 13:05:59,659][05794] Num frames 4600... -[2024-07-05 13:05:59,721][05794] Num frames 4700... -[2024-07-05 13:05:59,784][05794] Num frames 4800... -[2024-07-05 13:05:59,846][05794] Num frames 4900... -[2024-07-05 13:05:59,909][05794] Num frames 5000... -[2024-07-05 13:05:59,974][05794] Num frames 5100... -[2024-07-05 13:06:00,036][05794] Num frames 5200... -[2024-07-05 13:06:00,097][05794] Num frames 5300... -[2024-07-05 13:06:00,167][05794] Num frames 5400... -[2024-07-05 13:06:00,232][05794] Num frames 5500... -[2024-07-05 13:06:00,335][05794] Avg episode rewards: #0: 50.919, true rewards: #0: 18.587 -[2024-07-05 13:06:00,336][05794] Avg episode reward: 50.919, avg true_objective: 18.587 -[2024-07-05 13:06:00,357][05794] Num frames 5600... -[2024-07-05 13:06:00,419][05794] Num frames 5700... -[2024-07-05 13:06:00,479][05794] Num frames 5800... -[2024-07-05 13:06:00,540][05794] Num frames 5900... -[2024-07-05 13:06:00,599][05794] Num frames 6000... -[2024-07-05 13:06:00,659][05794] Num frames 6100... -[2024-07-05 13:06:00,718][05794] Num frames 6200... -[2024-07-05 13:06:00,779][05794] Num frames 6300... -[2024-07-05 13:06:00,840][05794] Num frames 6400... -[2024-07-05 13:06:00,901][05794] Num frames 6500... -[2024-07-05 13:06:00,962][05794] Num frames 6600... -[2024-07-05 13:06:01,023][05794] Num frames 6700... -[2024-07-05 13:06:01,082][05794] Num frames 6800... -[2024-07-05 13:06:01,147][05794] Num frames 6900... -[2024-07-05 13:06:01,208][05794] Num frames 7000... -[2024-07-05 13:06:01,270][05794] Num frames 7100... -[2024-07-05 13:06:01,330][05794] Num frames 7200... -[2024-07-05 13:06:01,392][05794] Num frames 7300... -[2024-07-05 13:06:01,452][05794] Num frames 7400... -[2024-07-05 13:06:01,514][05794] Num frames 7500... -[2024-07-05 13:06:01,577][05794] Num frames 7600... -[2024-07-05 13:06:01,678][05794] Avg episode rewards: #0: 52.939, true rewards: #0: 19.190 -[2024-07-05 13:06:01,679][05794] Avg episode reward: 52.939, avg true_objective: 19.190 -[2024-07-05 13:06:01,701][05794] Num frames 7700... -[2024-07-05 13:06:01,766][05794] Num frames 7800... -[2024-07-05 13:06:01,833][05794] Num frames 7900... -[2024-07-05 13:06:01,912][05794] Num frames 8000... -[2024-07-05 13:06:02,013][05794] Num frames 8100... -[2024-07-05 13:06:02,086][05794] Num frames 8200... -[2024-07-05 13:06:02,147][05794] Num frames 8300... -[2024-07-05 13:06:02,213][05794] Num frames 8400... -[2024-07-05 13:06:02,277][05794] Num frames 8500... -[2024-07-05 13:06:02,338][05794] Num frames 8600... -[2024-07-05 13:06:02,400][05794] Num frames 8700... -[2024-07-05 13:06:02,464][05794] Num frames 8800... -[2024-07-05 13:06:02,525][05794] Num frames 8900... -[2024-07-05 13:06:02,587][05794] Num frames 9000... -[2024-07-05 13:06:02,651][05794] Num frames 9100... -[2024-07-05 13:06:02,719][05794] Num frames 9200... -[2024-07-05 13:06:02,781][05794] Num frames 9300... -[2024-07-05 13:06:02,844][05794] Num frames 9400... -[2024-07-05 13:06:02,905][05794] Num frames 9500... -[2024-07-05 13:06:02,967][05794] Num frames 9600... -[2024-07-05 13:06:03,032][05794] Avg episode rewards: #0: 52.633, true rewards: #0: 19.234 -[2024-07-05 13:06:03,033][05794] Avg episode reward: 52.633, avg true_objective: 19.234 -[2024-07-05 13:06:03,090][05794] Num frames 9700... -[2024-07-05 13:06:03,156][05794] Num frames 9800... -[2024-07-05 13:06:03,218][05794] Num frames 9900... -[2024-07-05 13:06:03,278][05794] Num frames 10000... -[2024-07-05 13:06:03,340][05794] Num frames 10100... -[2024-07-05 13:06:03,402][05794] Num frames 10200... -[2024-07-05 13:06:03,465][05794] Num frames 10300... -[2024-07-05 13:06:03,527][05794] Num frames 10400... -[2024-07-05 13:06:03,591][05794] Num frames 10500... -[2024-07-05 13:06:03,651][05794] Num frames 10600... -[2024-07-05 13:06:03,713][05794] Num frames 10700... -[2024-07-05 13:06:03,775][05794] Num frames 10800... -[2024-07-05 13:06:03,838][05794] Num frames 10900... -[2024-07-05 13:06:03,899][05794] Num frames 11000... -[2024-07-05 13:06:03,962][05794] Num frames 11100... -[2024-07-05 13:06:04,026][05794] Num frames 11200... -[2024-07-05 13:06:04,089][05794] Num frames 11300... -[2024-07-05 13:06:04,151][05794] Num frames 11400... -[2024-07-05 13:06:04,214][05794] Num frames 11500... -[2024-07-05 13:06:04,274][05794] Num frames 11600... -[2024-07-05 13:06:04,336][05794] Num frames 11700... -[2024-07-05 13:06:04,401][05794] Avg episode rewards: #0: 53.194, true rewards: #0: 19.528 -[2024-07-05 13:06:04,402][05794] Avg episode reward: 53.194, avg true_objective: 19.528 -[2024-07-05 13:06:04,457][05794] Num frames 11800... -[2024-07-05 13:06:04,518][05794] Num frames 11900... -[2024-07-05 13:06:04,578][05794] Num frames 12000... -[2024-07-05 13:06:04,637][05794] Num frames 12100... -[2024-07-05 13:06:04,699][05794] Num frames 12200... -[2024-07-05 13:06:04,761][05794] Num frames 12300... -[2024-07-05 13:06:04,822][05794] Num frames 12400... -[2024-07-05 13:06:04,885][05794] Num frames 12500... -[2024-07-05 13:06:04,945][05794] Num frames 12600... -[2024-07-05 13:06:05,006][05794] Num frames 12700... -[2024-07-05 13:06:05,069][05794] Num frames 12800... -[2024-07-05 13:06:05,130][05794] Num frames 12900... -[2024-07-05 13:06:05,192][05794] Num frames 13000... -[2024-07-05 13:06:05,254][05794] Num frames 13100... -[2024-07-05 13:06:05,317][05794] Num frames 13200... -[2024-07-05 13:06:05,376][05794] Num frames 13300... -[2024-07-05 13:06:05,437][05794] Num frames 13400... -[2024-07-05 13:06:05,499][05794] Num frames 13500... -[2024-07-05 13:06:05,561][05794] Num frames 13600... -[2024-07-05 13:06:05,625][05794] Num frames 13700... -[2024-07-05 13:06:05,688][05794] Num frames 13800... -[2024-07-05 13:06:05,754][05794] Avg episode rewards: #0: 54.023, true rewards: #0: 19.739 -[2024-07-05 13:06:05,756][05794] Avg episode reward: 54.023, avg true_objective: 19.739 -[2024-07-05 13:06:05,812][05794] Num frames 13900... -[2024-07-05 13:06:05,874][05794] Num frames 14000... -[2024-07-05 13:06:05,934][05794] Num frames 14100... -[2024-07-05 13:06:06,007][05794] Num frames 14200... -[2024-07-05 13:06:06,068][05794] Num frames 14300... -[2024-07-05 13:06:06,129][05794] Num frames 14400... -[2024-07-05 13:06:06,188][05794] Num frames 14500... -[2024-07-05 13:06:06,247][05794] Num frames 14600... -[2024-07-05 13:06:06,307][05794] Num frames 14700... -[2024-07-05 13:06:06,368][05794] Num frames 14800... -[2024-07-05 13:06:06,435][05794] Num frames 14900... -[2024-07-05 13:06:06,496][05794] Num frames 15000... -[2024-07-05 13:06:06,558][05794] Num frames 15100... -[2024-07-05 13:06:06,618][05794] Num frames 15200... -[2024-07-05 13:06:06,681][05794] Num frames 15300... -[2024-07-05 13:06:06,742][05794] Num frames 15400... -[2024-07-05 13:06:06,803][05794] Num frames 15500... -[2024-07-05 13:06:06,863][05794] Num frames 15600... -[2024-07-05 13:06:06,926][05794] Num frames 15700... -[2024-07-05 13:06:06,989][05794] Num frames 15800... -[2024-07-05 13:06:07,052][05794] Num frames 15900... -[2024-07-05 13:06:07,117][05794] Avg episode rewards: #0: 55.020, true rewards: #0: 19.896 -[2024-07-05 13:06:07,119][05794] Avg episode reward: 55.020, avg true_objective: 19.896 -[2024-07-05 13:06:07,174][05794] Num frames 16000... -[2024-07-05 13:06:07,232][05794] Num frames 16100... -[2024-07-05 13:06:07,293][05794] Num frames 16200... -[2024-07-05 13:06:07,354][05794] Num frames 16300... -[2024-07-05 13:06:07,415][05794] Num frames 16400... -[2024-07-05 13:06:07,478][05794] Num frames 16500... -[2024-07-05 13:06:07,540][05794] Num frames 16600... -[2024-07-05 13:06:07,604][05794] Num frames 16700... -[2024-07-05 13:06:07,665][05794] Num frames 16800... -[2024-07-05 13:06:07,727][05794] Num frames 16900... -[2024-07-05 13:06:07,790][05794] Num frames 17000... -[2024-07-05 13:06:07,853][05794] Num frames 17100... -[2024-07-05 13:06:07,917][05794] Num frames 17200... -[2024-07-05 13:06:07,978][05794] Num frames 17300... -[2024-07-05 13:06:08,040][05794] Num frames 17400... -[2024-07-05 13:06:08,101][05794] Num frames 17500... -[2024-07-05 13:06:08,163][05794] Num frames 17600... -[2024-07-05 13:06:08,225][05794] Num frames 17700... -[2024-07-05 13:06:08,298][05794] Num frames 17800... -[2024-07-05 13:06:08,362][05794] Num frames 17900... -[2024-07-05 13:06:08,424][05794] Num frames 18000... -[2024-07-05 13:06:08,489][05794] Avg episode rewards: #0: 55.796, true rewards: #0: 20.019 -[2024-07-05 13:06:08,491][05794] Avg episode reward: 55.796, avg true_objective: 20.019 -[2024-07-05 13:06:08,544][05794] Num frames 18100... -[2024-07-05 13:06:08,606][05794] Num frames 18200... -[2024-07-05 13:06:08,668][05794] Num frames 18300... -[2024-07-05 13:06:08,727][05794] Num frames 18400... -[2024-07-05 13:06:08,790][05794] Num frames 18500... -[2024-07-05 13:06:08,851][05794] Num frames 18600... -[2024-07-05 13:06:08,913][05794] Num frames 18700... -[2024-07-05 13:06:08,972][05794] Num frames 18800... -[2024-07-05 13:06:09,032][05794] Num frames 18900... -[2024-07-05 13:06:09,095][05794] Num frames 19000... -[2024-07-05 13:06:09,157][05794] Num frames 19100... -[2024-07-05 13:06:09,221][05794] Num frames 19200... -[2024-07-05 13:06:09,283][05794] Num frames 19300... -[2024-07-05 13:06:09,346][05794] Num frames 19400... -[2024-07-05 13:06:09,408][05794] Num frames 19500... -[2024-07-05 13:06:09,473][05794] Num frames 19600... -[2024-07-05 13:06:09,533][05794] Num frames 19700... -[2024-07-05 13:06:09,596][05794] Num frames 19800... -[2024-07-05 13:06:09,659][05794] Num frames 19900... -[2024-07-05 13:06:09,756][05794] Avg episode rewards: #0: 55.668, true rewards: #0: 19.969 -[2024-07-05 13:06:09,757][05794] Avg episode reward: 55.668, avg true_objective: 19.969 -[2024-07-05 13:06:29,946][05794] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 13:09:21,072][03445] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 13:09:21,073][03445] Rollout worker 0 uses device cpu -[2024-07-05 13:09:21,074][03445] Rollout worker 1 uses device cpu -[2024-07-05 13:09:21,074][03445] Rollout worker 2 uses device cpu -[2024-07-05 13:09:21,075][03445] Rollout worker 3 uses device cpu -[2024-07-05 13:09:21,075][03445] Rollout worker 4 uses device cpu -[2024-07-05 13:09:21,076][03445] Rollout worker 5 uses device cpu -[2024-07-05 13:09:21,076][03445] Rollout worker 6 uses device cpu -[2024-07-05 13:09:21,076][03445] Rollout worker 7 uses device cpu -[2024-07-05 13:09:21,077][03445] Rollout worker 8 uses device cpu -[2024-07-05 13:09:21,077][03445] Rollout worker 9 uses device cpu -[2024-07-05 13:09:21,077][03445] Rollout worker 10 uses device cpu -[2024-07-05 13:09:21,078][03445] Rollout worker 11 uses device cpu -[2024-07-05 13:09:21,078][03445] Rollout worker 12 uses device cpu -[2024-07-05 13:09:21,079][03445] Rollout worker 13 uses device cpu -[2024-07-05 13:09:21,079][03445] Rollout worker 14 uses device cpu -[2024-07-05 13:09:21,079][03445] Rollout worker 15 uses device cpu -[2024-07-05 13:09:21,175][03445] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:09:21,176][03445] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 13:09:21,274][03445] Starting all processes... -[2024-07-05 13:09:21,275][03445] Starting process learner_proc0 -[2024-07-05 13:09:21,960][03445] Starting all processes... -[2024-07-05 13:09:21,965][03445] Starting process inference_proc0-0 -[2024-07-05 13:09:21,966][03445] Starting process rollout_proc0 -[2024-07-05 13:09:21,966][03445] Starting process rollout_proc1 -[2024-07-05 13:09:21,966][03445] Starting process rollout_proc2 -[2024-07-05 13:09:21,967][03445] Starting process rollout_proc3 -[2024-07-05 13:09:21,968][03445] Starting process rollout_proc4 -[2024-07-05 13:09:21,970][03445] Starting process rollout_proc5 -[2024-07-05 13:09:21,972][03445] Starting process rollout_proc6 -[2024-07-05 13:09:21,973][03445] Starting process rollout_proc7 -[2024-07-05 13:09:21,973][03445] Starting process rollout_proc8 -[2024-07-05 13:09:21,973][03445] Starting process rollout_proc9 -[2024-07-05 13:09:21,974][03445] Starting process rollout_proc10 -[2024-07-05 13:09:21,975][03445] Starting process rollout_proc11 -[2024-07-05 13:09:21,977][03445] Starting process rollout_proc12 -[2024-07-05 13:09:21,977][03445] Starting process rollout_proc13 -[2024-07-05 13:09:21,977][03445] Starting process rollout_proc14 -[2024-07-05 13:09:21,994][03445] Starting process rollout_proc15 -[2024-07-05 13:09:25,520][03957] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:09:25,521][03957] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 13:09:25,622][03957] Num visible devices: 1 -[2024-07-05 13:09:25,700][03957] Setting fixed seed 200 -[2024-07-05 13:09:25,703][03957] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:09:25,703][03957] Initializing actor-critic model on device cuda:0 -[2024-07-05 13:09:25,703][03957] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 13:09:25,705][03957] RunningMeanStd input shape: (1,) -[2024-07-05 13:09:25,717][03957] ConvEncoder: input_channels=3 -[2024-07-05 13:09:25,843][03978] Worker 0 uses CPU cores [0] -[2024-07-05 13:09:25,869][03980] Worker 2 uses CPU cores [2] -[2024-07-05 13:09:25,983][03977] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:09:25,983][03977] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 13:09:25,983][03957] Conv encoder output size: 512 -[2024-07-05 13:09:25,984][03957] Policy head output size: 512 -[2024-07-05 13:09:26,022][03957] Created Actor Critic model with architecture: -[2024-07-05 13:09:26,023][03957] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) + (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (9): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (10): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) + (11): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) + (12): ELU(alpha=1.0) + ) + (mlp_layers): Sequential( + (0): Linear(in_features=4608, out_features=512, bias=True) + (1): ELU(alpha=1.0) ) ) ) @@ -12100,1613 +2351,185 @@ main_loop: 1061.8633 (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-07-05 13:09:26,038][03981] Worker 3 uses CPU cores [3] -[2024-07-05 13:09:26,046][03977] Num visible devices: 1 -[2024-07-05 13:09:26,063][04004] Worker 11 uses CPU cores [11] -[2024-07-05 13:09:26,108][03983] Worker 5 uses CPU cores [5] -[2024-07-05 13:09:26,139][04005] Worker 13 uses CPU cores [13] -[2024-07-05 13:09:26,186][03957] Using optimizer -[2024-07-05 13:09:26,211][03987] Worker 10 uses CPU cores [10] -[2024-07-05 13:09:26,247][04008] Worker 15 uses CPU cores [15] -[2024-07-05 13:09:26,251][03986] Worker 8 uses CPU cores [8] -[2024-07-05 13:09:26,259][03979] Worker 1 uses CPU cores [1] -[2024-07-05 13:09:26,300][04007] Worker 14 uses CPU cores [14] -[2024-07-05 13:09:26,303][03982] Worker 4 uses CPU cores [4] -[2024-07-05 13:09:26,448][03984] Worker 6 uses CPU cores [6] -[2024-07-05 13:09:26,511][04006] Worker 12 uses CPU cores [12] -[2024-07-05 13:09:26,515][03985] Worker 7 uses CPU cores [7] -[2024-07-05 13:09:26,523][03988] Worker 9 uses CPU cores [9] -[2024-07-05 13:09:26,770][03957] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000032961_250011648.pth... -[2024-07-05 13:09:26,815][03957] Loading model from checkpoint -[2024-07-05 13:09:26,816][03957] Loaded experiment state at self.train_step=32961, self.env_steps=250011648 -[2024-07-05 13:09:26,816][03957] Initialized policy 0 weights for model version 32961 -[2024-07-05 13:09:26,817][03957] LearnerWorker_p0 finished initialization! -[2024-07-05 13:09:26,817][03957] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:09:26,879][03977] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 13:09:26,879][03977] RunningMeanStd input shape: (1,) -[2024-07-05 13:09:26,887][03977] ConvEncoder: input_channels=3 -[2024-07-05 13:09:26,941][03977] Conv encoder output size: 512 -[2024-07-05 13:09:26,941][03977] Policy head output size: 512 -[2024-07-05 13:09:26,973][03445] Inference worker 0-0 is ready! -[2024-07-05 13:09:26,974][03445] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 13:09:27,016][04004] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,021][03980] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,022][04006] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,022][03979] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,023][03988] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,024][03985] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,024][03983] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,028][03986] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,028][03982] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,029][03978] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,030][03984] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,030][04005] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,031][03981] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,036][04008] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,040][04007] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,052][03987] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:09:27,743][03981] Decorrelating experience for 0 frames... -[2024-07-05 13:09:27,743][03978] Decorrelating experience for 0 frames... -[2024-07-05 13:09:27,743][03986] Decorrelating experience for 0 frames... -[2024-07-05 13:09:27,743][03984] Decorrelating experience for 0 frames... -[2024-07-05 13:09:27,743][04005] Decorrelating experience for 0 frames... -[2024-07-05 13:09:27,743][04006] Decorrelating experience for 0 frames... -[2024-07-05 13:09:27,743][03982] Decorrelating experience for 0 frames... -[2024-07-05 13:09:27,908][03981] Decorrelating experience for 32 frames... -[2024-07-05 13:09:27,916][03982] Decorrelating experience for 32 frames... -[2024-07-05 13:09:27,920][04004] Decorrelating experience for 0 frames... -[2024-07-05 13:09:27,947][03978] Decorrelating experience for 32 frames... -[2024-07-05 13:09:27,963][04006] Decorrelating experience for 32 frames... -[2024-07-05 13:09:27,963][04005] Decorrelating experience for 32 frames... -[2024-07-05 13:09:27,967][04008] Decorrelating experience for 0 frames... -[2024-07-05 13:09:27,994][03979] Decorrelating experience for 0 frames... -[2024-07-05 13:09:28,001][04007] Decorrelating experience for 0 frames... -[2024-07-05 13:09:28,086][03981] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,092][03988] Decorrelating experience for 0 frames... -[2024-07-05 13:09:28,131][04008] Decorrelating experience for 32 frames... -[2024-07-05 13:09:28,131][03987] Decorrelating experience for 0 frames... -[2024-07-05 13:09:28,151][04006] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,157][03984] Decorrelating experience for 32 frames... -[2024-07-05 13:09:28,177][03985] Decorrelating experience for 0 frames... -[2024-07-05 13:09:28,281][03988] Decorrelating experience for 32 frames... -[2024-07-05 13:09:28,310][03981] Decorrelating experience for 96 frames... -[2024-07-05 13:09:28,326][03978] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,328][04004] Decorrelating experience for 32 frames... -[2024-07-05 13:09:28,360][04005] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,385][03979] Decorrelating experience for 32 frames... -[2024-07-05 13:09:28,401][04006] Decorrelating experience for 96 frames... -[2024-07-05 13:09:28,401][03980] Decorrelating experience for 0 frames... -[2024-07-05 13:09:28,425][03986] Decorrelating experience for 32 frames... -[2024-07-05 13:09:28,503][03983] Decorrelating experience for 0 frames... -[2024-07-05 13:09:28,531][03988] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,550][03985] Decorrelating experience for 32 frames... -[2024-07-05 13:09:28,582][04007] Decorrelating experience for 32 frames... -[2024-07-05 13:09:28,584][03978] Decorrelating experience for 96 frames... -[2024-07-05 13:09:28,590][03984] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,647][03986] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,661][04005] Decorrelating experience for 96 frames... -[2024-07-05 13:09:28,698][03983] Decorrelating experience for 32 frames... -[2024-07-05 13:09:28,708][03981] Decorrelating experience for 128 frames... -[2024-07-05 13:09:28,739][04006] Decorrelating experience for 128 frames... -[2024-07-05 13:09:28,772][04004] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,782][03987] Decorrelating experience for 32 frames... -[2024-07-05 13:09:28,798][03979] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,806][03984] Decorrelating experience for 96 frames... -[2024-07-05 13:09:28,893][03988] Decorrelating experience for 96 frames... -[2024-07-05 13:09:28,913][03983] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,971][03987] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,977][03978] Decorrelating experience for 128 frames... -[2024-07-05 13:09:28,983][04007] Decorrelating experience for 64 frames... -[2024-07-05 13:09:28,999][03982] Decorrelating experience for 64 frames... -[2024-07-05 13:09:29,011][04005] Decorrelating experience for 128 frames... -[2024-07-05 13:09:29,092][04008] Decorrelating experience for 64 frames... -[2024-07-05 13:09:29,103][03986] Decorrelating experience for 96 frames... -[2024-07-05 13:09:29,141][03985] Decorrelating experience for 64 frames... -[2024-07-05 13:09:29,143][03979] Decorrelating experience for 96 frames... -[2024-07-05 13:09:29,176][04006] Decorrelating experience for 160 frames... -[2024-07-05 13:09:29,256][03987] Decorrelating experience for 96 frames... -[2024-07-05 13:09:29,284][03982] Decorrelating experience for 96 frames... -[2024-07-05 13:09:29,310][04008] Decorrelating experience for 96 frames... -[2024-07-05 13:09:29,364][03978] Decorrelating experience for 160 frames... -[2024-07-05 13:09:29,382][04007] Decorrelating experience for 96 frames... -[2024-07-05 13:09:29,386][03988] Decorrelating experience for 128 frames... -[2024-07-05 13:09:29,434][03986] Decorrelating experience for 128 frames... -[2024-07-05 13:09:29,465][03984] Decorrelating experience for 128 frames... -[2024-07-05 13:09:29,531][03985] Decorrelating experience for 96 frames... -[2024-07-05 13:09:29,562][04006] Decorrelating experience for 192 frames... -[2024-07-05 13:09:29,580][03981] Decorrelating experience for 160 frames... -[2024-07-05 13:09:29,618][03988] Decorrelating experience for 160 frames... -[2024-07-05 13:09:29,620][04004] Decorrelating experience for 96 frames... -[2024-07-05 13:09:29,647][04007] Decorrelating experience for 128 frames... -[2024-07-05 13:09:29,667][03987] Decorrelating experience for 128 frames... -[2024-07-05 13:09:29,689][03983] Decorrelating experience for 96 frames... -[2024-07-05 13:09:29,743][03978] Decorrelating experience for 192 frames... -[2024-07-05 13:09:29,786][03984] Decorrelating experience for 160 frames... -[2024-07-05 13:09:29,804][03982] Decorrelating experience for 128 frames... -[2024-07-05 13:09:29,814][03986] Decorrelating experience for 160 frames... -[2024-07-05 13:09:29,846][04007] Decorrelating experience for 160 frames... -[2024-07-05 13:09:29,850][03981] Decorrelating experience for 192 frames... -[2024-07-05 13:09:29,976][03983] Decorrelating experience for 128 frames... -[2024-07-05 13:09:29,995][03978] Decorrelating experience for 224 frames... -[2024-07-05 13:09:29,997][03988] Decorrelating experience for 192 frames... -[2024-07-05 13:09:30,014][03987] Decorrelating experience for 160 frames... -[2024-07-05 13:09:30,070][03979] Decorrelating experience for 128 frames... -[2024-07-05 13:09:30,071][03981] Decorrelating experience for 224 frames... -[2024-07-05 13:09:30,103][04004] Decorrelating experience for 128 frames... -[2024-07-05 13:09:30,201][04005] Decorrelating experience for 160 frames... -[2024-07-05 13:09:30,217][04007] Decorrelating experience for 192 frames... -[2024-07-05 13:09:30,223][03988] Decorrelating experience for 224 frames... -[2024-07-05 13:09:30,235][03982] Decorrelating experience for 160 frames... -[2024-07-05 13:09:30,254][03983] Decorrelating experience for 160 frames... -[2024-07-05 13:09:30,277][03987] Decorrelating experience for 192 frames... -[2024-07-05 13:09:30,288][03984] Decorrelating experience for 192 frames... -[2024-07-05 13:09:30,429][04005] Decorrelating experience for 192 frames... -[2024-07-05 13:09:30,443][03985] Decorrelating experience for 128 frames... -[2024-07-05 13:09:30,453][04007] Decorrelating experience for 224 frames... -[2024-07-05 13:09:30,453][03986] Decorrelating experience for 192 frames... -[2024-07-05 13:09:30,465][04004] Decorrelating experience for 160 frames... -[2024-07-05 13:09:30,483][03979] Decorrelating experience for 160 frames... -[2024-07-05 13:09:30,494][03982] Decorrelating experience for 192 frames... -[2024-07-05 13:09:30,605][03980] Decorrelating experience for 32 frames... -[2024-07-05 13:09:30,629][03983] Decorrelating experience for 192 frames... -[2024-07-05 13:09:30,656][04005] Decorrelating experience for 224 frames... -[2024-07-05 13:09:30,675][03985] Decorrelating experience for 160 frames... -[2024-07-05 13:09:30,681][03986] Decorrelating experience for 224 frames... -[2024-07-05 13:09:30,717][04004] Decorrelating experience for 192 frames... -[2024-07-05 13:09:30,806][03980] Decorrelating experience for 64 frames... -[2024-07-05 13:09:30,845][03987] Decorrelating experience for 224 frames... -[2024-07-05 13:09:30,907][03982] Decorrelating experience for 224 frames... -[2024-07-05 13:09:30,918][03983] Decorrelating experience for 224 frames... -[2024-07-05 13:09:30,961][03445] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 250011648. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 13:09:30,963][03445] Avg episode reward: [(0, '0.800')] -[2024-07-05 13:09:31,017][04004] Decorrelating experience for 224 frames... -[2024-07-05 13:09:31,032][03985] Decorrelating experience for 192 frames... -[2024-07-05 13:09:31,035][03980] Decorrelating experience for 96 frames... -[2024-07-05 13:09:31,065][03979] Decorrelating experience for 192 frames... -[2024-07-05 13:09:31,340][03979] Decorrelating experience for 224 frames... -[2024-07-05 13:09:31,357][03985] Decorrelating experience for 224 frames... -[2024-07-05 13:09:31,360][03984] Decorrelating experience for 224 frames... -[2024-07-05 13:09:31,468][03957] Signal inference workers to stop experience collection... -[2024-07-05 13:09:31,474][03977] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 13:09:31,548][04006] Decorrelating experience for 224 frames... -[2024-07-05 13:09:31,740][03980] Decorrelating experience for 128 frames... -[2024-07-05 13:09:31,930][03980] Decorrelating experience for 160 frames... -[2024-07-05 13:09:31,930][04008] Decorrelating experience for 128 frames... -[2024-07-05 13:09:32,121][04008] Decorrelating experience for 160 frames... -[2024-07-05 13:09:32,141][03980] Decorrelating experience for 192 frames... -[2024-07-05 13:09:32,318][04008] Decorrelating experience for 192 frames... -[2024-07-05 13:09:32,356][03980] Decorrelating experience for 224 frames... -[2024-07-05 13:09:32,531][04008] Decorrelating experience for 224 frames... -[2024-07-05 13:09:33,046][03957] Signal inference workers to resume experience collection... -[2024-07-05 13:09:33,046][03977] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 13:09:34,901][03977] Updated weights for policy 0, policy_version 32971 (0.0096) -[2024-07-05 13:09:35,961][03445] Fps is (10 sec: 26214.3, 60 sec: 26214.3, 300 sec: 26214.3). Total num frames: 250142720. Throughput: 0: 1069.6. Samples: 5348. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:09:35,963][03445] Avg episode reward: [(0, '6.614')] -[2024-07-05 13:09:36,659][03977] Updated weights for policy 0, policy_version 32981 (0.0008) -[2024-07-05 13:09:38,409][03977] Updated weights for policy 0, policy_version 32991 (0.0008) -[2024-07-05 13:09:40,120][03977] Updated weights for policy 0, policy_version 33001 (0.0008) -[2024-07-05 13:09:40,961][03445] Fps is (10 sec: 36044.6, 60 sec: 36044.6, 300 sec: 36044.6). Total num frames: 250372096. Throughput: 0: 7047.2. Samples: 70472. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:09:40,963][03445] Avg episode reward: [(0, '52.896')] -[2024-07-05 13:09:41,169][03445] Heartbeat connected on Batcher_0 -[2024-07-05 13:09:41,172][03445] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 13:09:41,180][03445] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 13:09:41,183][03445] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 13:09:41,187][03445] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 13:09:41,191][03445] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 13:09:41,192][03445] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 13:09:41,196][03445] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 13:09:41,198][03445] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 13:09:41,202][03445] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 13:09:41,206][03445] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 13:09:41,207][03445] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 13:09:41,258][03445] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 13:09:41,259][03445] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 13:09:41,265][03445] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 13:09:41,266][03445] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 13:09:41,270][03445] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 13:09:41,278][03445] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 13:09:41,279][03445] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 13:09:41,830][03977] Updated weights for policy 0, policy_version 33011 (0.0009) -[2024-07-05 13:09:43,536][03977] Updated weights for policy 0, policy_version 33021 (0.0008) -[2024-07-05 13:09:45,269][03977] Updated weights for policy 0, policy_version 33031 (0.0008) -[2024-07-05 13:09:45,961][03445] Fps is (10 sec: 47513.5, 60 sec: 40413.8, 300 sec: 40413.8). Total num frames: 250617856. Throughput: 0: 9513.3. Samples: 142700. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:09:45,963][03445] Avg episode reward: [(0, '53.032')] -[2024-07-05 13:09:46,930][03977] Updated weights for policy 0, policy_version 33041 (0.0008) -[2024-07-05 13:09:48,611][03977] Updated weights for policy 0, policy_version 33051 (0.0008) -[2024-07-05 13:09:50,342][03977] Updated weights for policy 0, policy_version 33061 (0.0008) -[2024-07-05 13:09:50,961][03445] Fps is (10 sec: 48332.7, 60 sec: 42188.7, 300 sec: 42188.7). Total num frames: 250855424. Throughput: 0: 8967.4. Samples: 179348. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:09:50,963][03445] Avg episode reward: [(0, '55.614')] -[2024-07-05 13:09:50,964][03957] Saving new best policy, reward=55.614! -[2024-07-05 13:09:52,118][03977] Updated weights for policy 0, policy_version 33071 (0.0008) -[2024-07-05 13:09:53,990][03977] Updated weights for policy 0, policy_version 33081 (0.0009) -[2024-07-05 13:09:55,750][03977] Updated weights for policy 0, policy_version 33091 (0.0010) -[2024-07-05 13:09:55,961][03445] Fps is (10 sec: 46694.3, 60 sec: 42926.0, 300 sec: 42926.0). Total num frames: 251084800. Throughput: 0: 9941.9. Samples: 248548. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 13:09:55,963][03445] Avg episode reward: [(0, '53.803')] -[2024-07-05 13:09:57,532][03977] Updated weights for policy 0, policy_version 33101 (0.0012) -[2024-07-05 13:09:59,306][03977] Updated weights for policy 0, policy_version 33111 (0.0008) -[2024-07-05 13:10:00,961][03445] Fps is (10 sec: 45875.4, 60 sec: 43417.5, 300 sec: 43417.5). Total num frames: 251314176. Throughput: 0: 10586.9. Samples: 317608. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 13:10:00,963][03445] Avg episode reward: [(0, '54.106')] -[2024-07-05 13:10:01,069][03977] Updated weights for policy 0, policy_version 33121 (0.0010) -[2024-07-05 13:10:02,856][03977] Updated weights for policy 0, policy_version 33131 (0.0008) -[2024-07-05 13:10:04,583][03977] Updated weights for policy 0, policy_version 33141 (0.0008) -[2024-07-05 13:10:05,962][03445] Fps is (10 sec: 45874.3, 60 sec: 43768.4, 300 sec: 43768.4). Total num frames: 251543552. Throughput: 0: 10063.0. Samples: 352208. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 13:10:05,963][03445] Avg episode reward: [(0, '51.998')] -[2024-07-05 13:10:06,370][03977] Updated weights for policy 0, policy_version 33151 (0.0008) -[2024-07-05 13:10:08,065][03977] Updated weights for policy 0, policy_version 33161 (0.0008) -[2024-07-05 13:10:09,738][03977] Updated weights for policy 0, policy_version 33171 (0.0008) -[2024-07-05 13:10:10,961][03445] Fps is (10 sec: 47514.3, 60 sec: 44441.7, 300 sec: 44441.7). Total num frames: 251789312. Throughput: 0: 10587.1. Samples: 423484. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) -[2024-07-05 13:10:10,962][03445] Avg episode reward: [(0, '52.108')] -[2024-07-05 13:10:11,451][03977] Updated weights for policy 0, policy_version 33181 (0.0008) -[2024-07-05 13:10:13,170][03977] Updated weights for policy 0, policy_version 33191 (0.0008) -[2024-07-05 13:10:14,866][03977] Updated weights for policy 0, policy_version 33201 (0.0010) -[2024-07-05 13:10:15,962][03445] Fps is (10 sec: 48332.7, 60 sec: 44782.7, 300 sec: 44782.7). Total num frames: 252026880. Throughput: 0: 11017.2. Samples: 495776. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:10:15,963][03445] Avg episode reward: [(0, '51.924')] -[2024-07-05 13:10:16,532][03977] Updated weights for policy 0, policy_version 33211 (0.0011) -[2024-07-05 13:10:18,196][03977] Updated weights for policy 0, policy_version 33221 (0.0009) -[2024-07-05 13:10:19,910][03977] Updated weights for policy 0, policy_version 33231 (0.0007) -[2024-07-05 13:10:20,961][03445] Fps is (10 sec: 48332.1, 60 sec: 45219.8, 300 sec: 45219.8). Total num frames: 252272640. Throughput: 0: 11707.2. Samples: 532172. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:10:20,962][03445] Avg episode reward: [(0, '51.750')] -[2024-07-05 13:10:21,595][03977] Updated weights for policy 0, policy_version 33241 (0.0008) -[2024-07-05 13:10:23,301][03977] Updated weights for policy 0, policy_version 33251 (0.0009) -[2024-07-05 13:10:25,017][03977] Updated weights for policy 0, policy_version 33261 (0.0011) -[2024-07-05 13:10:25,962][03445] Fps is (10 sec: 48333.5, 60 sec: 45428.3, 300 sec: 45428.3). Total num frames: 252510208. Throughput: 0: 11868.2. Samples: 604540. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:10:25,963][03445] Avg episode reward: [(0, '52.857')] -[2024-07-05 13:10:26,718][03977] Updated weights for policy 0, policy_version 33271 (0.0008) -[2024-07-05 13:10:28,427][03977] Updated weights for policy 0, policy_version 33281 (0.0008) -[2024-07-05 13:10:30,101][03977] Updated weights for policy 0, policy_version 33291 (0.0008) -[2024-07-05 13:10:30,961][03445] Fps is (10 sec: 48332.6, 60 sec: 45738.6, 300 sec: 45738.6). Total num frames: 252755968. Throughput: 0: 11872.6. Samples: 676968. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:10:30,962][03445] Avg episode reward: [(0, '51.220')] -[2024-07-05 13:10:31,794][03977] Updated weights for policy 0, policy_version 33301 (0.0011) -[2024-07-05 13:10:33,478][03977] Updated weights for policy 0, policy_version 33311 (0.0009) -[2024-07-05 13:10:35,170][03977] Updated weights for policy 0, policy_version 33321 (0.0009) -[2024-07-05 13:10:35,961][03445] Fps is (10 sec: 48333.1, 60 sec: 47513.6, 300 sec: 45875.2). Total num frames: 252993536. Throughput: 0: 11865.3. Samples: 713284. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:10:35,962][03445] Avg episode reward: [(0, '52.264')] -[2024-07-05 13:10:36,857][03977] Updated weights for policy 0, policy_version 33331 (0.0008) -[2024-07-05 13:10:38,582][03977] Updated weights for policy 0, policy_version 33341 (0.0011) -[2024-07-05 13:10:40,248][03977] Updated weights for policy 0, policy_version 33351 (0.0010) -[2024-07-05 13:10:40,961][03445] Fps is (10 sec: 48333.0, 60 sec: 47786.7, 300 sec: 46109.2). Total num frames: 253239296. Throughput: 0: 11945.3. Samples: 786084. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:10:40,963][03445] Avg episode reward: [(0, '50.570')] -[2024-07-05 13:10:41,948][03977] Updated weights for policy 0, policy_version 33361 (0.0008) -[2024-07-05 13:10:43,647][03977] Updated weights for policy 0, policy_version 33371 (0.0008) -[2024-07-05 13:10:45,327][03977] Updated weights for policy 0, policy_version 33381 (0.0010) -[2024-07-05 13:10:45,962][03445] Fps is (10 sec: 48332.4, 60 sec: 47650.1, 300 sec: 46202.8). Total num frames: 253476864. Throughput: 0: 12017.9. Samples: 858416. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:10:45,963][03445] Avg episode reward: [(0, '51.278')] -[2024-07-05 13:10:47,027][03977] Updated weights for policy 0, policy_version 33391 (0.0008) -[2024-07-05 13:10:48,702][03977] Updated weights for policy 0, policy_version 33401 (0.0008) -[2024-07-05 13:10:50,390][03977] Updated weights for policy 0, policy_version 33411 (0.0008) -[2024-07-05 13:10:50,961][03445] Fps is (10 sec: 48332.8, 60 sec: 47786.7, 300 sec: 46387.2). Total num frames: 253722624. Throughput: 0: 12062.6. Samples: 895024. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:10:50,962][03445] Avg episode reward: [(0, '49.629')] -[2024-07-05 13:10:52,070][03977] Updated weights for policy 0, policy_version 33421 (0.0007) -[2024-07-05 13:10:53,774][03977] Updated weights for policy 0, policy_version 33431 (0.0008) -[2024-07-05 13:10:55,482][03977] Updated weights for policy 0, policy_version 33441 (0.0008) -[2024-07-05 13:10:55,961][03445] Fps is (10 sec: 49152.6, 60 sec: 48059.8, 300 sec: 46549.8). Total num frames: 253968384. Throughput: 0: 12091.9. Samples: 967620. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:10:55,962][03445] Avg episode reward: [(0, '51.937')] -[2024-07-05 13:10:57,154][03977] Updated weights for policy 0, policy_version 33451 (0.0008) -[2024-07-05 13:10:58,863][03977] Updated weights for policy 0, policy_version 33461 (0.0007) -[2024-07-05 13:11:00,559][03977] Updated weights for policy 0, policy_version 33471 (0.0008) -[2024-07-05 13:11:00,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 46603.4). Total num frames: 254205952. Throughput: 0: 12107.2. Samples: 1040596. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:11:00,963][03445] Avg episode reward: [(0, '51.334')] -[2024-07-05 13:11:02,228][03977] Updated weights for policy 0, policy_version 33481 (0.0007) -[2024-07-05 13:11:03,943][03977] Updated weights for policy 0, policy_version 33491 (0.0009) -[2024-07-05 13:11:05,625][03977] Updated weights for policy 0, policy_version 33501 (0.0007) -[2024-07-05 13:11:05,962][03445] Fps is (10 sec: 48332.2, 60 sec: 48469.4, 300 sec: 46737.4). Total num frames: 254451712. Throughput: 0: 12100.4. Samples: 1076692. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:11:05,962][03445] Avg episode reward: [(0, '51.186')] -[2024-07-05 13:11:07,277][03977] Updated weights for policy 0, policy_version 33511 (0.0008) -[2024-07-05 13:11:08,965][03977] Updated weights for policy 0, policy_version 33521 (0.0010) -[2024-07-05 13:11:10,674][03977] Updated weights for policy 0, policy_version 33531 (0.0008) -[2024-07-05 13:11:10,961][03445] Fps is (10 sec: 48333.1, 60 sec: 48332.7, 300 sec: 46776.3). Total num frames: 254689280. Throughput: 0: 12110.0. Samples: 1149488. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:11:10,962][03445] Avg episode reward: [(0, '50.280')] -[2024-07-05 13:11:12,359][03977] Updated weights for policy 0, policy_version 33541 (0.0010) -[2024-07-05 13:11:14,040][03977] Updated weights for policy 0, policy_version 33551 (0.0008) -[2024-07-05 13:11:15,730][03977] Updated weights for policy 0, policy_version 33561 (0.0010) -[2024-07-05 13:11:15,961][03445] Fps is (10 sec: 48333.2, 60 sec: 48469.5, 300 sec: 46889.4). Total num frames: 254935040. Throughput: 0: 12118.3. Samples: 1222292. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:11:15,963][03445] Avg episode reward: [(0, '53.016')] -[2024-07-05 13:11:15,967][03957] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000033562_254935040.pth... -[2024-07-05 13:11:16,029][03957] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000032354_245039104.pth -[2024-07-05 13:11:17,463][03977] Updated weights for policy 0, policy_version 33571 (0.0009) -[2024-07-05 13:11:19,145][03977] Updated weights for policy 0, policy_version 33581 (0.0008) -[2024-07-05 13:11:20,798][03977] Updated weights for policy 0, policy_version 33591 (0.0008) -[2024-07-05 13:11:20,961][03445] Fps is (10 sec: 48332.4, 60 sec: 48332.8, 300 sec: 46917.8). Total num frames: 255172608. Throughput: 0: 12115.6. Samples: 1258484. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:11:20,962][03445] Avg episode reward: [(0, '51.789')] -[2024-07-05 13:11:22,500][03977] Updated weights for policy 0, policy_version 33601 (0.0008) -[2024-07-05 13:11:24,174][03977] Updated weights for policy 0, policy_version 33611 (0.0009) -[2024-07-05 13:11:25,874][03977] Updated weights for policy 0, policy_version 33621 (0.0008) -[2024-07-05 13:11:25,961][03445] Fps is (10 sec: 48333.2, 60 sec: 48469.5, 300 sec: 47015.0). Total num frames: 255418368. Throughput: 0: 12112.8. Samples: 1331160. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:11:25,962][03445] Avg episode reward: [(0, '50.936')] -[2024-07-05 13:11:27,599][03977] Updated weights for policy 0, policy_version 33631 (0.0008) -[2024-07-05 13:11:29,272][03977] Updated weights for policy 0, policy_version 33641 (0.0008) -[2024-07-05 13:11:30,961][03445] Fps is (10 sec: 48333.3, 60 sec: 48332.9, 300 sec: 47035.8). Total num frames: 255655936. Throughput: 0: 12115.5. Samples: 1403612. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:11:30,962][03445] Avg episode reward: [(0, '50.055')] -[2024-07-05 13:11:30,981][03977] Updated weights for policy 0, policy_version 33651 (0.0008) -[2024-07-05 13:11:32,725][03977] Updated weights for policy 0, policy_version 33661 (0.0007) -[2024-07-05 13:11:34,419][03977] Updated weights for policy 0, policy_version 33671 (0.0009) -[2024-07-05 13:11:35,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48469.4, 300 sec: 47120.4). Total num frames: 255901696. Throughput: 0: 12100.7. Samples: 1439556. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:11:35,962][03445] Avg episode reward: [(0, '51.230')] -[2024-07-05 13:11:36,076][03977] Updated weights for policy 0, policy_version 33681 (0.0010) -[2024-07-05 13:11:37,766][03977] Updated weights for policy 0, policy_version 33691 (0.0007) -[2024-07-05 13:11:39,440][03977] Updated weights for policy 0, policy_version 33701 (0.0008) -[2024-07-05 13:11:40,962][03445] Fps is (10 sec: 48332.1, 60 sec: 48332.7, 300 sec: 47135.5). Total num frames: 256139264. Throughput: 0: 12099.6. Samples: 1512104. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:11:40,962][03445] Avg episode reward: [(0, '50.167')] -[2024-07-05 13:11:41,190][03977] Updated weights for policy 0, policy_version 33711 (0.0008) -[2024-07-05 13:11:42,842][03977] Updated weights for policy 0, policy_version 33721 (0.0009) -[2024-07-05 13:11:44,567][03977] Updated weights for policy 0, policy_version 33731 (0.0008) -[2024-07-05 13:11:45,961][03445] Fps is (10 sec: 48332.5, 60 sec: 48469.4, 300 sec: 47210.2). Total num frames: 256385024. Throughput: 0: 12090.9. Samples: 1584688. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:11:45,963][03445] Avg episode reward: [(0, '54.106')] -[2024-07-05 13:11:46,277][03977] Updated weights for policy 0, policy_version 33741 (0.0008) -[2024-07-05 13:11:47,957][03977] Updated weights for policy 0, policy_version 33751 (0.0008) -[2024-07-05 13:11:49,634][03977] Updated weights for policy 0, policy_version 33761 (0.0009) -[2024-07-05 13:11:50,961][03445] Fps is (10 sec: 48333.3, 60 sec: 48332.8, 300 sec: 47221.0). Total num frames: 256622592. Throughput: 0: 12088.0. Samples: 1620652. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:11:50,962][03445] Avg episode reward: [(0, '51.384')] -[2024-07-05 13:11:51,359][03977] Updated weights for policy 0, policy_version 33771 (0.0008) -[2024-07-05 13:11:53,025][03977] Updated weights for policy 0, policy_version 33781 (0.0007) -[2024-07-05 13:11:54,711][03977] Updated weights for policy 0, policy_version 33791 (0.0008) -[2024-07-05 13:11:55,961][03445] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 47287.6). Total num frames: 256868352. Throughput: 0: 12089.3. Samples: 1693508. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:11:55,963][03445] Avg episode reward: [(0, '53.338')] -[2024-07-05 13:11:56,401][03977] Updated weights for policy 0, policy_version 33801 (0.0008) -[2024-07-05 13:11:58,117][03977] Updated weights for policy 0, policy_version 33811 (0.0008) -[2024-07-05 13:11:59,821][03977] Updated weights for policy 0, policy_version 33821 (0.0008) -[2024-07-05 13:12:00,961][03445] Fps is (10 sec: 48333.1, 60 sec: 48332.9, 300 sec: 47295.2). Total num frames: 257105920. Throughput: 0: 12080.1. Samples: 1765896. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:12:00,962][03445] Avg episode reward: [(0, '50.978')] -[2024-07-05 13:12:01,534][03977] Updated weights for policy 0, policy_version 33831 (0.0009) -[2024-07-05 13:12:03,210][03977] Updated weights for policy 0, policy_version 33841 (0.0007) -[2024-07-05 13:12:04,847][03977] Updated weights for policy 0, policy_version 33851 (0.0007) -[2024-07-05 13:12:05,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48332.9, 300 sec: 47355.0). Total num frames: 257351680. Throughput: 0: 12086.2. Samples: 1802364. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:12:05,962][03445] Avg episode reward: [(0, '54.175')] -[2024-07-05 13:12:06,618][03977] Updated weights for policy 0, policy_version 33861 (0.0008) -[2024-07-05 13:12:08,267][03977] Updated weights for policy 0, policy_version 33871 (0.0007) -[2024-07-05 13:12:09,937][03977] Updated weights for policy 0, policy_version 33881 (0.0012) -[2024-07-05 13:12:10,961][03445] Fps is (10 sec: 49151.8, 60 sec: 48469.3, 300 sec: 47411.2). Total num frames: 257597440. Throughput: 0: 12078.8. Samples: 1874704. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:12:10,962][03445] Avg episode reward: [(0, '51.403')] -[2024-07-05 13:12:11,613][03977] Updated weights for policy 0, policy_version 33891 (0.0008) -[2024-07-05 13:12:13,334][03977] Updated weights for policy 0, policy_version 33901 (0.0007) -[2024-07-05 13:12:15,047][03977] Updated weights for policy 0, policy_version 33911 (0.0008) -[2024-07-05 13:12:15,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 47414.3). Total num frames: 257835008. Throughput: 0: 12091.9. Samples: 1947748. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:12:15,963][03445] Avg episode reward: [(0, '52.204')] -[2024-07-05 13:12:16,710][03977] Updated weights for policy 0, policy_version 33921 (0.0008) -[2024-07-05 13:12:18,418][03977] Updated weights for policy 0, policy_version 33931 (0.0008) -[2024-07-05 13:12:20,102][03977] Updated weights for policy 0, policy_version 33941 (0.0008) -[2024-07-05 13:12:20,961][03445] Fps is (10 sec: 48332.3, 60 sec: 48469.3, 300 sec: 47465.4). Total num frames: 258080768. Throughput: 0: 12100.3. Samples: 1984072. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:12:20,963][03445] Avg episode reward: [(0, '53.479')] -[2024-07-05 13:12:21,804][03977] Updated weights for policy 0, policy_version 33951 (0.0008) -[2024-07-05 13:12:23,496][03977] Updated weights for policy 0, policy_version 33961 (0.0010) -[2024-07-05 13:12:25,187][03977] Updated weights for policy 0, policy_version 33971 (0.0013) -[2024-07-05 13:12:25,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48332.7, 300 sec: 47466.8). Total num frames: 258318336. Throughput: 0: 12100.2. Samples: 2056612. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:12:25,962][03445] Avg episode reward: [(0, '51.846')] -[2024-07-05 13:12:26,875][03977] Updated weights for policy 0, policy_version 33981 (0.0010) -[2024-07-05 13:12:28,566][03977] Updated weights for policy 0, policy_version 33991 (0.0008) -[2024-07-05 13:12:30,253][03977] Updated weights for policy 0, policy_version 34001 (0.0007) -[2024-07-05 13:12:30,961][03445] Fps is (10 sec: 48333.2, 60 sec: 48469.3, 300 sec: 47513.6). Total num frames: 258564096. Throughput: 0: 12094.7. Samples: 2128948. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:12:30,962][03445] Avg episode reward: [(0, '50.168')] -[2024-07-05 13:12:31,939][03977] Updated weights for policy 0, policy_version 34011 (0.0008) -[2024-07-05 13:12:33,672][03977] Updated weights for policy 0, policy_version 34021 (0.0008) -[2024-07-05 13:12:35,394][03977] Updated weights for policy 0, policy_version 34031 (0.0008) -[2024-07-05 13:12:35,962][03445] Fps is (10 sec: 48332.2, 60 sec: 48332.7, 300 sec: 47513.6). Total num frames: 258801664. Throughput: 0: 12106.0. Samples: 2165424. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:12:35,963][03445] Avg episode reward: [(0, '50.240')] -[2024-07-05 13:12:37,084][03977] Updated weights for policy 0, policy_version 34041 (0.0008) -[2024-07-05 13:12:38,774][03977] Updated weights for policy 0, policy_version 34051 (0.0007) -[2024-07-05 13:12:40,447][03977] Updated weights for policy 0, policy_version 34061 (0.0009) -[2024-07-05 13:12:40,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48469.4, 300 sec: 47556.7). Total num frames: 259047424. Throughput: 0: 12096.1. Samples: 2237832. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:12:40,962][03445] Avg episode reward: [(0, '49.736')] -[2024-07-05 13:12:42,110][03977] Updated weights for policy 0, policy_version 34071 (0.0008) -[2024-07-05 13:12:43,839][03977] Updated weights for policy 0, policy_version 34081 (0.0008) -[2024-07-05 13:12:45,562][03977] Updated weights for policy 0, policy_version 34091 (0.0011) -[2024-07-05 13:12:45,961][03445] Fps is (10 sec: 48333.6, 60 sec: 48332.9, 300 sec: 47555.6). Total num frames: 259284992. Throughput: 0: 12099.1. Samples: 2310356. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:12:45,962][03445] Avg episode reward: [(0, '54.397')] -[2024-07-05 13:12:47,264][03977] Updated weights for policy 0, policy_version 34101 (0.0008) -[2024-07-05 13:12:48,932][03977] Updated weights for policy 0, policy_version 34111 (0.0010) -[2024-07-05 13:12:50,600][03977] Updated weights for policy 0, policy_version 34121 (0.0008) -[2024-07-05 13:12:50,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48469.3, 300 sec: 47595.5). Total num frames: 259530752. Throughput: 0: 12089.9. Samples: 2346408. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:12:50,963][03445] Avg episode reward: [(0, '52.557')] -[2024-07-05 13:12:52,289][03977] Updated weights for policy 0, policy_version 34131 (0.0010) -[2024-07-05 13:12:53,967][03977] Updated weights for policy 0, policy_version 34141 (0.0008) -[2024-07-05 13:12:55,688][03977] Updated weights for policy 0, policy_version 34151 (0.0008) -[2024-07-05 13:12:55,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 47593.5). Total num frames: 259768320. Throughput: 0: 12091.8. Samples: 2418836. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:12:55,963][03445] Avg episode reward: [(0, '51.320')] -[2024-07-05 13:12:57,374][03977] Updated weights for policy 0, policy_version 34161 (0.0007) -[2024-07-05 13:12:59,114][03977] Updated weights for policy 0, policy_version 34171 (0.0008) -[2024-07-05 13:13:00,784][03977] Updated weights for policy 0, policy_version 34181 (0.0009) -[2024-07-05 13:13:00,962][03445] Fps is (10 sec: 48331.7, 60 sec: 48469.1, 300 sec: 47630.6). Total num frames: 260014080. Throughput: 0: 12080.8. Samples: 2491388. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:13:00,963][03445] Avg episode reward: [(0, '53.629')] -[2024-07-05 13:13:02,473][03977] Updated weights for policy 0, policy_version 34191 (0.0008) -[2024-07-05 13:13:04,135][03977] Updated weights for policy 0, policy_version 34201 (0.0010) -[2024-07-05 13:13:05,887][03977] Updated weights for policy 0, policy_version 34211 (0.0008) -[2024-07-05 13:13:05,962][03445] Fps is (10 sec: 48331.8, 60 sec: 48332.7, 300 sec: 47627.9). Total num frames: 260251648. Throughput: 0: 12083.3. Samples: 2527824. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:13:05,963][03445] Avg episode reward: [(0, '52.472')] -[2024-07-05 13:13:07,532][03977] Updated weights for policy 0, policy_version 34221 (0.0009) -[2024-07-05 13:13:09,228][03977] Updated weights for policy 0, policy_version 34231 (0.0008) -[2024-07-05 13:13:10,928][03977] Updated weights for policy 0, policy_version 34241 (0.0008) -[2024-07-05 13:13:10,961][03445] Fps is (10 sec: 48334.1, 60 sec: 48332.8, 300 sec: 47662.5). Total num frames: 260497408. Throughput: 0: 12079.5. Samples: 2600188. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:13:10,962][03445] Avg episode reward: [(0, '51.952')] -[2024-07-05 13:13:12,648][03977] Updated weights for policy 0, policy_version 34251 (0.0008) -[2024-07-05 13:13:14,329][03977] Updated weights for policy 0, policy_version 34261 (0.0008) -[2024-07-05 13:13:15,961][03445] Fps is (10 sec: 48333.7, 60 sec: 48332.8, 300 sec: 47659.2). Total num frames: 260734976. Throughput: 0: 12095.3. Samples: 2673236. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:13:15,963][03445] Avg episode reward: [(0, '50.334')] -[2024-07-05 13:13:15,967][03957] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000034270_260734976.pth... -[2024-07-05 13:13:16,037][03957] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000032961_250011648.pth -[2024-07-05 13:13:16,099][03977] Updated weights for policy 0, policy_version 34271 (0.0011) -[2024-07-05 13:13:17,763][03977] Updated weights for policy 0, policy_version 34281 (0.0008) -[2024-07-05 13:13:19,439][03977] Updated weights for policy 0, policy_version 34291 (0.0007) -[2024-07-05 13:13:20,962][03445] Fps is (10 sec: 47512.9, 60 sec: 48196.2, 300 sec: 47656.0). Total num frames: 260972544. Throughput: 0: 12074.1. Samples: 2708760. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:13:20,962][03445] Avg episode reward: [(0, '51.210')] -[2024-07-05 13:13:21,116][03977] Updated weights for policy 0, policy_version 34301 (0.0009) -[2024-07-05 13:13:22,816][03977] Updated weights for policy 0, policy_version 34311 (0.0008) -[2024-07-05 13:13:24,516][03977] Updated weights for policy 0, policy_version 34321 (0.0007) -[2024-07-05 13:13:25,961][03445] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 47687.9). Total num frames: 261218304. Throughput: 0: 12077.2. Samples: 2781304. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:13:25,962][03445] Avg episode reward: [(0, '52.094')] -[2024-07-05 13:13:26,231][03977] Updated weights for policy 0, policy_version 34331 (0.0008) -[2024-07-05 13:13:27,971][03977] Updated weights for policy 0, policy_version 34341 (0.0009) -[2024-07-05 13:13:29,670][03977] Updated weights for policy 0, policy_version 34351 (0.0007) -[2024-07-05 13:13:30,962][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.2, 300 sec: 47684.2). Total num frames: 261455872. Throughput: 0: 12070.4. Samples: 2853528. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:13:30,963][03445] Avg episode reward: [(0, '52.731')] -[2024-07-05 13:13:31,356][03977] Updated weights for policy 0, policy_version 34361 (0.0008) -[2024-07-05 13:13:33,107][03977] Updated weights for policy 0, policy_version 34371 (0.0008) -[2024-07-05 13:13:34,778][03977] Updated weights for policy 0, policy_version 34381 (0.0008) -[2024-07-05 13:13:35,962][03445] Fps is (10 sec: 48332.1, 60 sec: 48332.8, 300 sec: 47714.2). Total num frames: 261701632. Throughput: 0: 12058.4. Samples: 2889036. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:13:35,963][03445] Avg episode reward: [(0, '53.199')] -[2024-07-05 13:13:36,477][03977] Updated weights for policy 0, policy_version 34391 (0.0008) -[2024-07-05 13:13:38,218][03977] Updated weights for policy 0, policy_version 34401 (0.0008) -[2024-07-05 13:13:39,879][03977] Updated weights for policy 0, policy_version 34411 (0.0008) -[2024-07-05 13:13:40,961][03445] Fps is (10 sec: 47514.2, 60 sec: 48059.7, 300 sec: 47677.4). Total num frames: 261931008. Throughput: 0: 12054.4. Samples: 2961284. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:13:40,962][03445] Avg episode reward: [(0, '53.765')] -[2024-07-05 13:13:41,672][03977] Updated weights for policy 0, policy_version 34421 (0.0008) -[2024-07-05 13:13:43,485][03977] Updated weights for policy 0, policy_version 34431 (0.0008) -[2024-07-05 13:13:45,183][03977] Updated weights for policy 0, policy_version 34441 (0.0008) -[2024-07-05 13:13:45,961][03445] Fps is (10 sec: 47514.3, 60 sec: 48196.3, 300 sec: 47706.4). Total num frames: 262176768. Throughput: 0: 12001.2. Samples: 3031440. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:13:45,962][03445] Avg episode reward: [(0, '51.097')] -[2024-07-05 13:13:46,873][03977] Updated weights for policy 0, policy_version 34451 (0.0010) -[2024-07-05 13:13:48,573][03977] Updated weights for policy 0, policy_version 34461 (0.0008) -[2024-07-05 13:13:50,273][03977] Updated weights for policy 0, policy_version 34471 (0.0007) -[2024-07-05 13:13:50,962][03445] Fps is (10 sec: 48331.9, 60 sec: 48059.6, 300 sec: 47702.6). Total num frames: 262414336. Throughput: 0: 11992.5. Samples: 3067488. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:13:50,963][03445] Avg episode reward: [(0, '53.736')] -[2024-07-05 13:13:51,928][03977] Updated weights for policy 0, policy_version 34481 (0.0008) -[2024-07-05 13:13:53,656][03977] Updated weights for policy 0, policy_version 34491 (0.0008) -[2024-07-05 13:13:55,378][03977] Updated weights for policy 0, policy_version 34501 (0.0008) -[2024-07-05 13:13:55,961][03445] Fps is (10 sec: 47513.7, 60 sec: 48059.8, 300 sec: 47699.1). Total num frames: 262651904. Throughput: 0: 11989.9. Samples: 3139732. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:13:55,962][03445] Avg episode reward: [(0, '55.853')] -[2024-07-05 13:13:55,967][03957] Saving new best policy, reward=55.853! -[2024-07-05 13:13:57,065][03977] Updated weights for policy 0, policy_version 34511 (0.0008) -[2024-07-05 13:13:58,772][03977] Updated weights for policy 0, policy_version 34521 (0.0010) -[2024-07-05 13:14:00,483][03977] Updated weights for policy 0, policy_version 34531 (0.0008) -[2024-07-05 13:14:00,961][03445] Fps is (10 sec: 47514.8, 60 sec: 47923.4, 300 sec: 47695.7). Total num frames: 262889472. Throughput: 0: 11967.4. Samples: 3211768. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:00,962][03445] Avg episode reward: [(0, '53.447')] -[2024-07-05 13:14:02,179][03977] Updated weights for policy 0, policy_version 34541 (0.0009) -[2024-07-05 13:14:03,869][03977] Updated weights for policy 0, policy_version 34551 (0.0010) -[2024-07-05 13:14:05,563][03977] Updated weights for policy 0, policy_version 34561 (0.0010) -[2024-07-05 13:14:05,962][03445] Fps is (10 sec: 48331.7, 60 sec: 48059.7, 300 sec: 47722.1). Total num frames: 263135232. Throughput: 0: 11992.4. Samples: 3248420. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:05,963][03445] Avg episode reward: [(0, '51.670')] -[2024-07-05 13:14:07,285][03977] Updated weights for policy 0, policy_version 34571 (0.0008) -[2024-07-05 13:14:08,942][03977] Updated weights for policy 0, policy_version 34581 (0.0007) -[2024-07-05 13:14:10,603][03977] Updated weights for policy 0, policy_version 34591 (0.0008) -[2024-07-05 13:14:10,961][03445] Fps is (10 sec: 49152.1, 60 sec: 48059.8, 300 sec: 47747.7). Total num frames: 263380992. Throughput: 0: 11992.6. Samples: 3320972. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:10,962][03445] Avg episode reward: [(0, '51.396')] -[2024-07-05 13:14:12,341][03977] Updated weights for policy 0, policy_version 34601 (0.0008) -[2024-07-05 13:14:14,035][03977] Updated weights for policy 0, policy_version 34611 (0.0008) -[2024-07-05 13:14:15,699][03977] Updated weights for policy 0, policy_version 34621 (0.0008) -[2024-07-05 13:14:15,962][03445] Fps is (10 sec: 48333.2, 60 sec: 48059.7, 300 sec: 47743.5). Total num frames: 263618560. Throughput: 0: 11999.9. Samples: 3393524. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:15,963][03445] Avg episode reward: [(0, '53.020')] -[2024-07-05 13:14:17,445][03977] Updated weights for policy 0, policy_version 34631 (0.0007) -[2024-07-05 13:14:19,126][03977] Updated weights for policy 0, policy_version 34641 (0.0008) -[2024-07-05 13:14:20,822][03977] Updated weights for policy 0, policy_version 34651 (0.0008) -[2024-07-05 13:14:20,961][03445] Fps is (10 sec: 47513.5, 60 sec: 48059.9, 300 sec: 47739.6). Total num frames: 263856128. Throughput: 0: 12011.6. Samples: 3429556. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:20,962][03445] Avg episode reward: [(0, '55.206')] -[2024-07-05 13:14:22,499][03977] Updated weights for policy 0, policy_version 34661 (0.0008) -[2024-07-05 13:14:24,209][03977] Updated weights for policy 0, policy_version 34671 (0.0008) -[2024-07-05 13:14:25,916][03977] Updated weights for policy 0, policy_version 34681 (0.0008) -[2024-07-05 13:14:25,962][03445] Fps is (10 sec: 48332.8, 60 sec: 48059.6, 300 sec: 47763.5). Total num frames: 264101888. Throughput: 0: 12014.5. Samples: 3501940. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:25,963][03445] Avg episode reward: [(0, '50.561')] -[2024-07-05 13:14:27,620][03977] Updated weights for policy 0, policy_version 34691 (0.0008) -[2024-07-05 13:14:29,295][03977] Updated weights for policy 0, policy_version 34701 (0.0011) -[2024-07-05 13:14:30,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48059.8, 300 sec: 48124.5). Total num frames: 264339456. Throughput: 0: 12067.4. Samples: 3574472. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:30,963][03445] Avg episode reward: [(0, '54.342')] -[2024-07-05 13:14:31,017][03977] Updated weights for policy 0, policy_version 34711 (0.0010) -[2024-07-05 13:14:32,703][03977] Updated weights for policy 0, policy_version 34721 (0.0007) -[2024-07-05 13:14:34,371][03977] Updated weights for policy 0, policy_version 34731 (0.0007) -[2024-07-05 13:14:35,962][03445] Fps is (10 sec: 48332.9, 60 sec: 48059.8, 300 sec: 48180.1). Total num frames: 264585216. Throughput: 0: 12071.0. Samples: 3610680. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:35,963][03445] Avg episode reward: [(0, '52.131')] -[2024-07-05 13:14:36,044][03977] Updated weights for policy 0, policy_version 34741 (0.0008) -[2024-07-05 13:14:37,759][03977] Updated weights for policy 0, policy_version 34751 (0.0008) -[2024-07-05 13:14:39,446][03977] Updated weights for policy 0, policy_version 34761 (0.0007) -[2024-07-05 13:14:40,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48152.3). Total num frames: 264822784. Throughput: 0: 12075.1. Samples: 3683112. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:40,963][03445] Avg episode reward: [(0, '52.446')] -[2024-07-05 13:14:41,197][03977] Updated weights for policy 0, policy_version 34771 (0.0008) -[2024-07-05 13:14:42,886][03977] Updated weights for policy 0, policy_version 34781 (0.0008) -[2024-07-05 13:14:44,581][03977] Updated weights for policy 0, policy_version 34791 (0.0008) -[2024-07-05 13:14:45,961][03445] Fps is (10 sec: 48333.4, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 265068544. Throughput: 0: 12080.2. Samples: 3755376. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:45,963][03445] Avg episode reward: [(0, '51.142')] -[2024-07-05 13:14:46,245][03977] Updated weights for policy 0, policy_version 34801 (0.0010) -[2024-07-05 13:14:47,928][03977] Updated weights for policy 0, policy_version 34811 (0.0007) -[2024-07-05 13:14:49,627][03977] Updated weights for policy 0, policy_version 34821 (0.0008) -[2024-07-05 13:14:50,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.4, 300 sec: 48207.8). Total num frames: 265306112. Throughput: 0: 12072.9. Samples: 3791696. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:50,962][03445] Avg episode reward: [(0, '53.556')] -[2024-07-05 13:14:51,341][03977] Updated weights for policy 0, policy_version 34831 (0.0008) -[2024-07-05 13:14:53,057][03977] Updated weights for policy 0, policy_version 34841 (0.0008) -[2024-07-05 13:14:54,769][03977] Updated weights for policy 0, policy_version 34851 (0.0009) -[2024-07-05 13:14:55,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 265551872. Throughput: 0: 12073.5. Samples: 3864280. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:14:55,963][03445] Avg episode reward: [(0, '51.424')] -[2024-07-05 13:14:56,459][03977] Updated weights for policy 0, policy_version 34861 (0.0012) -[2024-07-05 13:14:58,118][03977] Updated weights for policy 0, policy_version 34871 (0.0011) -[2024-07-05 13:14:59,859][03977] Updated weights for policy 0, policy_version 34881 (0.0007) -[2024-07-05 13:15:00,961][03445] Fps is (10 sec: 48332.5, 60 sec: 48332.7, 300 sec: 48291.2). Total num frames: 265789440. Throughput: 0: 12066.1. Samples: 3936496. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:15:00,963][03445] Avg episode reward: [(0, '52.344')] -[2024-07-05 13:15:01,533][03977] Updated weights for policy 0, policy_version 34891 (0.0010) -[2024-07-05 13:15:03,227][03977] Updated weights for policy 0, policy_version 34901 (0.0014) -[2024-07-05 13:15:04,904][03977] Updated weights for policy 0, policy_version 34911 (0.0008) -[2024-07-05 13:15:05,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48332.9, 300 sec: 48291.1). Total num frames: 266035200. Throughput: 0: 12075.0. Samples: 3972932. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:15:05,962][03445] Avg episode reward: [(0, '52.949')] -[2024-07-05 13:15:06,617][03977] Updated weights for policy 0, policy_version 34921 (0.0008) -[2024-07-05 13:15:08,294][03977] Updated weights for policy 0, policy_version 34931 (0.0008) -[2024-07-05 13:15:09,983][03977] Updated weights for policy 0, policy_version 34941 (0.0007) -[2024-07-05 13:15:10,961][03445] Fps is (10 sec: 48333.2, 60 sec: 48196.2, 300 sec: 48291.2). Total num frames: 266272768. Throughput: 0: 12076.6. Samples: 4045384. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:15:10,963][03445] Avg episode reward: [(0, '51.952')] -[2024-07-05 13:15:11,663][03977] Updated weights for policy 0, policy_version 34951 (0.0008) -[2024-07-05 13:15:13,357][03977] Updated weights for policy 0, policy_version 34961 (0.0008) -[2024-07-05 13:15:15,035][03977] Updated weights for policy 0, policy_version 34971 (0.0008) -[2024-07-05 13:15:15,961][03445] Fps is (10 sec: 48333.4, 60 sec: 48333.0, 300 sec: 48291.2). Total num frames: 266518528. Throughput: 0: 12068.9. Samples: 4117572. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:15:15,962][03445] Avg episode reward: [(0, '51.598')] -[2024-07-05 13:15:15,967][03957] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000034976_266518528.pth... -[2024-07-05 13:15:16,039][03957] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000033562_254935040.pth -[2024-07-05 13:15:16,783][03977] Updated weights for policy 0, policy_version 34981 (0.0010) -[2024-07-05 13:15:18,505][03977] Updated weights for policy 0, policy_version 34991 (0.0011) -[2024-07-05 13:15:20,185][03977] Updated weights for policy 0, policy_version 35001 (0.0008) -[2024-07-05 13:15:20,962][03445] Fps is (10 sec: 48332.2, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 266756096. Throughput: 0: 12067.6. Samples: 4153724. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:15:20,963][03445] Avg episode reward: [(0, '51.087')] -[2024-07-05 13:15:21,861][03977] Updated weights for policy 0, policy_version 35011 (0.0007) -[2024-07-05 13:15:23,596][03977] Updated weights for policy 0, policy_version 35021 (0.0007) -[2024-07-05 13:15:25,280][03977] Updated weights for policy 0, policy_version 35031 (0.0008) -[2024-07-05 13:15:25,962][03445] Fps is (10 sec: 48332.1, 60 sec: 48332.9, 300 sec: 48291.1). Total num frames: 267001856. Throughput: 0: 12072.0. Samples: 4226352. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 13:15:25,962][03445] Avg episode reward: [(0, '54.035')] -[2024-07-05 13:15:26,950][03977] Updated weights for policy 0, policy_version 35041 (0.0007) -[2024-07-05 13:15:28,646][03977] Updated weights for policy 0, policy_version 35051 (0.0008) -[2024-07-05 13:15:30,411][03977] Updated weights for policy 0, policy_version 35061 (0.0008) -[2024-07-05 13:15:30,962][03445] Fps is (10 sec: 48332.2, 60 sec: 48332.6, 300 sec: 48291.1). Total num frames: 267239424. Throughput: 0: 12070.8. Samples: 4298564. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 13:15:30,963][03445] Avg episode reward: [(0, '54.587')] -[2024-07-05 13:15:32,085][03977] Updated weights for policy 0, policy_version 35071 (0.0009) -[2024-07-05 13:15:33,808][03977] Updated weights for policy 0, policy_version 35081 (0.0008) -[2024-07-05 13:15:35,519][03977] Updated weights for policy 0, policy_version 35091 (0.0010) -[2024-07-05 13:15:35,961][03445] Fps is (10 sec: 48333.5, 60 sec: 48333.0, 300 sec: 48291.2). Total num frames: 267485184. Throughput: 0: 12065.7. Samples: 4334652. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 13:15:35,962][03445] Avg episode reward: [(0, '50.351')] -[2024-07-05 13:15:37,181][03977] Updated weights for policy 0, policy_version 35101 (0.0008) -[2024-07-05 13:15:38,876][03977] Updated weights for policy 0, policy_version 35111 (0.0008) -[2024-07-05 13:15:40,559][03977] Updated weights for policy 0, policy_version 35121 (0.0008) -[2024-07-05 13:15:40,961][03445] Fps is (10 sec: 48334.0, 60 sec: 48332.8, 300 sec: 48291.2). Total num frames: 267722752. Throughput: 0: 12062.9. Samples: 4407108. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 13:15:40,963][03445] Avg episode reward: [(0, '50.565')] -[2024-07-05 13:15:42,296][03977] Updated weights for policy 0, policy_version 35131 (0.0008) -[2024-07-05 13:15:44,002][03977] Updated weights for policy 0, policy_version 35141 (0.0011) -[2024-07-05 13:15:45,678][03977] Updated weights for policy 0, policy_version 35151 (0.0010) -[2024-07-05 13:15:45,961][03445] Fps is (10 sec: 47513.0, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 267960320. Throughput: 0: 12056.2. Samples: 4479024. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:15:45,962][03445] Avg episode reward: [(0, '54.234')] -[2024-07-05 13:15:47,396][03977] Updated weights for policy 0, policy_version 35161 (0.0008) -[2024-07-05 13:15:49,086][03977] Updated weights for policy 0, policy_version 35171 (0.0008) -[2024-07-05 13:15:50,771][03977] Updated weights for policy 0, policy_version 35181 (0.0011) -[2024-07-05 13:15:50,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 268206080. Throughput: 0: 12058.8. Samples: 4515576. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:15:50,963][03445] Avg episode reward: [(0, '51.038')] -[2024-07-05 13:15:52,472][03977] Updated weights for policy 0, policy_version 35191 (0.0008) -[2024-07-05 13:15:54,185][03977] Updated weights for policy 0, policy_version 35201 (0.0007) -[2024-07-05 13:15:55,859][03977] Updated weights for policy 0, policy_version 35211 (0.0008) -[2024-07-05 13:15:55,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 268443648. Throughput: 0: 12053.3. Samples: 4587784. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:15:55,962][03445] Avg episode reward: [(0, '50.087')] -[2024-07-05 13:15:57,534][03977] Updated weights for policy 0, policy_version 35221 (0.0008) -[2024-07-05 13:15:59,201][03977] Updated weights for policy 0, policy_version 35231 (0.0010) -[2024-07-05 13:16:00,940][03977] Updated weights for policy 0, policy_version 35241 (0.0015) -[2024-07-05 13:16:00,962][03445] Fps is (10 sec: 48331.8, 60 sec: 48332.7, 300 sec: 48263.4). Total num frames: 268689408. Throughput: 0: 12068.9. Samples: 4660676. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:16:00,963][03445] Avg episode reward: [(0, '52.846')] -[2024-07-05 13:16:02,626][03977] Updated weights for policy 0, policy_version 35251 (0.0008) -[2024-07-05 13:16:04,333][03977] Updated weights for policy 0, policy_version 35261 (0.0008) -[2024-07-05 13:16:05,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 268926976. Throughput: 0: 12069.1. Samples: 4696832. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:16:05,962][03445] Avg episode reward: [(0, '48.629')] -[2024-07-05 13:16:06,036][03977] Updated weights for policy 0, policy_version 35271 (0.0008) -[2024-07-05 13:16:07,742][03977] Updated weights for policy 0, policy_version 35281 (0.0008) -[2024-07-05 13:16:09,406][03977] Updated weights for policy 0, policy_version 35291 (0.0008) -[2024-07-05 13:16:10,961][03445] Fps is (10 sec: 48333.9, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 269172736. Throughput: 0: 12064.1. Samples: 4769236. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:16:10,962][03445] Avg episode reward: [(0, '52.732')] -[2024-07-05 13:16:11,119][03977] Updated weights for policy 0, policy_version 35301 (0.0008) -[2024-07-05 13:16:12,796][03977] Updated weights for policy 0, policy_version 35311 (0.0007) -[2024-07-05 13:16:14,474][03977] Updated weights for policy 0, policy_version 35321 (0.0011) -[2024-07-05 13:16:15,962][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 269410304. Throughput: 0: 12067.1. Samples: 4841580. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:16:15,963][03445] Avg episode reward: [(0, '52.002')] -[2024-07-05 13:16:16,189][03977] Updated weights for policy 0, policy_version 35331 (0.0007) -[2024-07-05 13:16:17,937][03977] Updated weights for policy 0, policy_version 35341 (0.0008) -[2024-07-05 13:16:19,616][03977] Updated weights for policy 0, policy_version 35351 (0.0010) -[2024-07-05 13:16:20,961][03445] Fps is (10 sec: 47513.7, 60 sec: 48196.4, 300 sec: 48235.6). Total num frames: 269647872. Throughput: 0: 12060.5. Samples: 4877376. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:16:20,962][03445] Avg episode reward: [(0, '53.095')] -[2024-07-05 13:16:21,331][03977] Updated weights for policy 0, policy_version 35361 (0.0007) -[2024-07-05 13:16:23,001][03977] Updated weights for policy 0, policy_version 35371 (0.0008) -[2024-07-05 13:16:24,689][03977] Updated weights for policy 0, policy_version 35381 (0.0008) -[2024-07-05 13:16:25,962][03445] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 269893632. Throughput: 0: 12064.2. Samples: 4950000. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:16:25,963][03445] Avg episode reward: [(0, '53.274')] -[2024-07-05 13:16:26,396][03977] Updated weights for policy 0, policy_version 35391 (0.0008) -[2024-07-05 13:16:28,094][03977] Updated weights for policy 0, policy_version 35401 (0.0008) -[2024-07-05 13:16:29,799][03977] Updated weights for policy 0, policy_version 35411 (0.0008) -[2024-07-05 13:16:30,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48196.5, 300 sec: 48235.6). Total num frames: 270131200. Throughput: 0: 12074.0. Samples: 5022352. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:16:30,962][03445] Avg episode reward: [(0, '51.247')] -[2024-07-05 13:16:31,484][03977] Updated weights for policy 0, policy_version 35421 (0.0013) -[2024-07-05 13:16:33,198][03977] Updated weights for policy 0, policy_version 35431 (0.0008) -[2024-07-05 13:16:34,904][03977] Updated weights for policy 0, policy_version 35441 (0.0008) -[2024-07-05 13:16:35,961][03445] Fps is (10 sec: 48333.4, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 270376960. Throughput: 0: 12073.3. Samples: 5058872. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:16:35,962][03445] Avg episode reward: [(0, '54.949')] -[2024-07-05 13:16:36,529][03977] Updated weights for policy 0, policy_version 35451 (0.0007) -[2024-07-05 13:16:38,221][03977] Updated weights for policy 0, policy_version 35461 (0.0007) -[2024-07-05 13:16:39,939][03977] Updated weights for policy 0, policy_version 35471 (0.0008) -[2024-07-05 13:16:40,961][03445] Fps is (10 sec: 49151.9, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 270622720. Throughput: 0: 12078.5. Samples: 5131316. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:16:40,963][03445] Avg episode reward: [(0, '53.864')] -[2024-07-05 13:16:41,621][03977] Updated weights for policy 0, policy_version 35481 (0.0014) -[2024-07-05 13:16:43,315][03977] Updated weights for policy 0, policy_version 35491 (0.0008) -[2024-07-05 13:16:45,028][03977] Updated weights for policy 0, policy_version 35501 (0.0011) -[2024-07-05 13:16:45,962][03445] Fps is (10 sec: 48332.1, 60 sec: 48332.7, 300 sec: 48263.4). Total num frames: 270860288. Throughput: 0: 12069.9. Samples: 5203820. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:16:45,962][03445] Avg episode reward: [(0, '53.263')] -[2024-07-05 13:16:46,746][03977] Updated weights for policy 0, policy_version 35511 (0.0008) -[2024-07-05 13:16:48,439][03977] Updated weights for policy 0, policy_version 35521 (0.0008) -[2024-07-05 13:16:50,124][03977] Updated weights for policy 0, policy_version 35531 (0.0008) -[2024-07-05 13:16:50,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 271106048. Throughput: 0: 12079.1. Samples: 5240392. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:16:50,964][03445] Avg episode reward: [(0, '51.207')] -[2024-07-05 13:16:51,801][03977] Updated weights for policy 0, policy_version 35541 (0.0008) -[2024-07-05 13:16:53,500][03977] Updated weights for policy 0, policy_version 35551 (0.0008) -[2024-07-05 13:16:55,178][03977] Updated weights for policy 0, policy_version 35561 (0.0008) -[2024-07-05 13:16:55,961][03445] Fps is (10 sec: 48333.2, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 271343616. Throughput: 0: 12079.5. Samples: 5312816. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) -[2024-07-05 13:16:55,962][03445] Avg episode reward: [(0, '52.538')] -[2024-07-05 13:16:56,870][03977] Updated weights for policy 0, policy_version 35571 (0.0008) -[2024-07-05 13:16:58,580][03977] Updated weights for policy 0, policy_version 35581 (0.0008) -[2024-07-05 13:17:00,263][03977] Updated weights for policy 0, policy_version 35591 (0.0012) -[2024-07-05 13:17:00,962][03445] Fps is (10 sec: 48331.8, 60 sec: 48332.8, 300 sec: 48263.3). Total num frames: 271589376. Throughput: 0: 12079.0. Samples: 5385136. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) -[2024-07-05 13:17:00,963][03445] Avg episode reward: [(0, '52.365')] -[2024-07-05 13:17:01,955][03977] Updated weights for policy 0, policy_version 35601 (0.0008) -[2024-07-05 13:17:03,663][03977] Updated weights for policy 0, policy_version 35611 (0.0008) -[2024-07-05 13:17:05,388][03977] Updated weights for policy 0, policy_version 35621 (0.0008) -[2024-07-05 13:17:05,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 271826944. Throughput: 0: 12083.9. Samples: 5421152. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) -[2024-07-05 13:17:05,963][03445] Avg episode reward: [(0, '53.099')] -[2024-07-05 13:17:07,052][03977] Updated weights for policy 0, policy_version 35631 (0.0008) -[2024-07-05 13:17:08,752][03977] Updated weights for policy 0, policy_version 35641 (0.0008) -[2024-07-05 13:17:10,442][03977] Updated weights for policy 0, policy_version 35651 (0.0008) -[2024-07-05 13:17:10,961][03445] Fps is (10 sec: 48334.3, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 272072704. Throughput: 0: 12088.0. Samples: 5493960. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) -[2024-07-05 13:17:10,962][03445] Avg episode reward: [(0, '50.325')] -[2024-07-05 13:17:12,145][03977] Updated weights for policy 0, policy_version 35661 (0.0007) -[2024-07-05 13:17:13,835][03977] Updated weights for policy 0, policy_version 35671 (0.0008) -[2024-07-05 13:17:15,567][03977] Updated weights for policy 0, policy_version 35681 (0.0008) -[2024-07-05 13:17:15,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 272310272. Throughput: 0: 12091.5. Samples: 5566472. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) -[2024-07-05 13:17:15,962][03445] Avg episode reward: [(0, '51.508')] -[2024-07-05 13:17:15,966][03957] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000035683_272310272.pth... -[2024-07-05 13:17:16,035][03957] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000034270_260734976.pth -[2024-07-05 13:17:17,277][03977] Updated weights for policy 0, policy_version 35691 (0.0008) -[2024-07-05 13:17:18,973][03977] Updated weights for policy 0, policy_version 35701 (0.0008) -[2024-07-05 13:17:20,661][03977] Updated weights for policy 0, policy_version 35711 (0.0008) -[2024-07-05 13:17:20,961][03445] Fps is (10 sec: 47513.0, 60 sec: 48332.7, 300 sec: 48235.6). Total num frames: 272547840. Throughput: 0: 12073.6. Samples: 5602184. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:17:20,962][03445] Avg episode reward: [(0, '50.019')] -[2024-07-05 13:17:22,372][03977] Updated weights for policy 0, policy_version 35721 (0.0008) -[2024-07-05 13:17:24,050][03977] Updated weights for policy 0, policy_version 35731 (0.0008) -[2024-07-05 13:17:25,767][03977] Updated weights for policy 0, policy_version 35741 (0.0008) -[2024-07-05 13:17:25,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48332.9, 300 sec: 48235.6). Total num frames: 272793600. Throughput: 0: 12081.2. Samples: 5674972. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:17:25,963][03445] Avg episode reward: [(0, '50.693')] -[2024-07-05 13:17:27,426][03977] Updated weights for policy 0, policy_version 35751 (0.0009) -[2024-07-05 13:17:29,150][03977] Updated weights for policy 0, policy_version 35761 (0.0008) -[2024-07-05 13:17:30,824][03977] Updated weights for policy 0, policy_version 35771 (0.0008) -[2024-07-05 13:17:30,961][03445] Fps is (10 sec: 48333.2, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 273031168. Throughput: 0: 12074.2. Samples: 5747156. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:17:30,962][03445] Avg episode reward: [(0, '52.494')] -[2024-07-05 13:17:32,561][03977] Updated weights for policy 0, policy_version 35781 (0.0008) -[2024-07-05 13:17:34,243][03977] Updated weights for policy 0, policy_version 35791 (0.0008) -[2024-07-05 13:17:35,947][03977] Updated weights for policy 0, policy_version 35801 (0.0009) -[2024-07-05 13:17:35,961][03445] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 273276928. Throughput: 0: 12063.8. Samples: 5783264. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:17:35,962][03445] Avg episode reward: [(0, '51.613')] -[2024-07-05 13:17:37,648][03977] Updated weights for policy 0, policy_version 35811 (0.0008) -[2024-07-05 13:17:39,342][03977] Updated weights for policy 0, policy_version 35821 (0.0008) -[2024-07-05 13:17:40,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 273514496. Throughput: 0: 12062.9. Samples: 5855644. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:17:40,963][03445] Avg episode reward: [(0, '53.297')] -[2024-07-05 13:17:41,043][03977] Updated weights for policy 0, policy_version 35831 (0.0008) -[2024-07-05 13:17:42,763][03977] Updated weights for policy 0, policy_version 35841 (0.0008) -[2024-07-05 13:17:44,411][03977] Updated weights for policy 0, policy_version 35851 (0.0008) -[2024-07-05 13:17:45,961][03445] Fps is (10 sec: 47513.7, 60 sec: 48196.4, 300 sec: 48207.8). Total num frames: 273752064. Throughput: 0: 12065.6. Samples: 5928084. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:17:45,962][03445] Avg episode reward: [(0, '53.844')] -[2024-07-05 13:17:46,130][03977] Updated weights for policy 0, policy_version 35861 (0.0009) -[2024-07-05 13:17:47,799][03977] Updated weights for policy 0, policy_version 35871 (0.0008) -[2024-07-05 13:17:49,515][03977] Updated weights for policy 0, policy_version 35881 (0.0008) -[2024-07-05 13:17:50,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 273997824. Throughput: 0: 12064.7. Samples: 5964064. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:17:50,963][03445] Avg episode reward: [(0, '52.247')] -[2024-07-05 13:17:51,260][03977] Updated weights for policy 0, policy_version 35891 (0.0008) -[2024-07-05 13:17:52,923][03977] Updated weights for policy 0, policy_version 35901 (0.0008) -[2024-07-05 13:17:54,621][03977] Updated weights for policy 0, policy_version 35911 (0.0008) -[2024-07-05 13:17:55,962][03445] Fps is (10 sec: 49151.1, 60 sec: 48332.7, 300 sec: 48235.6). Total num frames: 274243584. Throughput: 0: 12046.3. Samples: 6036048. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:17:55,963][03445] Avg episode reward: [(0, '53.129')] -[2024-07-05 13:17:56,278][03977] Updated weights for policy 0, policy_version 35921 (0.0009) -[2024-07-05 13:17:58,040][03977] Updated weights for policy 0, policy_version 35931 (0.0010) -[2024-07-05 13:17:59,707][03977] Updated weights for policy 0, policy_version 35941 (0.0008) -[2024-07-05 13:18:00,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.5, 300 sec: 48235.6). Total num frames: 274481152. Throughput: 0: 12045.3. Samples: 6108512. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:18:00,962][03445] Avg episode reward: [(0, '50.898')] -[2024-07-05 13:18:01,417][03977] Updated weights for policy 0, policy_version 35951 (0.0010) -[2024-07-05 13:18:03,137][03977] Updated weights for policy 0, policy_version 35961 (0.0010) -[2024-07-05 13:18:04,826][03977] Updated weights for policy 0, policy_version 35971 (0.0015) -[2024-07-05 13:18:05,962][03445] Fps is (10 sec: 47512.8, 60 sec: 48196.1, 300 sec: 48207.8). Total num frames: 274718720. Throughput: 0: 12053.1. Samples: 6144576. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:18:05,963][03445] Avg episode reward: [(0, '53.098')] -[2024-07-05 13:18:06,525][03977] Updated weights for policy 0, policy_version 35981 (0.0008) -[2024-07-05 13:18:08,271][03977] Updated weights for policy 0, policy_version 35991 (0.0008) -[2024-07-05 13:18:09,966][03977] Updated weights for policy 0, policy_version 36001 (0.0008) -[2024-07-05 13:18:10,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 274964480. Throughput: 0: 12044.1. Samples: 6216956. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:18:10,962][03445] Avg episode reward: [(0, '50.921')] -[2024-07-05 13:18:11,614][03977] Updated weights for policy 0, policy_version 36011 (0.0008) -[2024-07-05 13:18:13,328][03977] Updated weights for policy 0, policy_version 36021 (0.0008) -[2024-07-05 13:18:15,005][03977] Updated weights for policy 0, policy_version 36031 (0.0008) -[2024-07-05 13:18:15,961][03445] Fps is (10 sec: 48334.1, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 275202048. Throughput: 0: 12051.7. Samples: 6289484. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:18:15,963][03445] Avg episode reward: [(0, '52.673')] -[2024-07-05 13:18:16,721][03977] Updated weights for policy 0, policy_version 36041 (0.0013) -[2024-07-05 13:18:18,423][03977] Updated weights for policy 0, policy_version 36051 (0.0008) -[2024-07-05 13:18:20,109][03977] Updated weights for policy 0, policy_version 36061 (0.0013) -[2024-07-05 13:18:20,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 275447808. Throughput: 0: 12056.2. Samples: 6325792. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:18:20,963][03445] Avg episode reward: [(0, '52.244')] -[2024-07-05 13:18:21,804][03977] Updated weights for policy 0, policy_version 36071 (0.0007) -[2024-07-05 13:18:23,544][03977] Updated weights for policy 0, policy_version 36081 (0.0008) -[2024-07-05 13:18:25,269][03977] Updated weights for policy 0, policy_version 36091 (0.0010) -[2024-07-05 13:18:25,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 275685376. Throughput: 0: 12052.0. Samples: 6397984. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:18:25,962][03445] Avg episode reward: [(0, '52.951')] -[2024-07-05 13:18:26,905][03977] Updated weights for policy 0, policy_version 36101 (0.0008) -[2024-07-05 13:18:28,626][03977] Updated weights for policy 0, policy_version 36111 (0.0008) -[2024-07-05 13:18:30,333][03977] Updated weights for policy 0, policy_version 36121 (0.0008) -[2024-07-05 13:18:30,961][03445] Fps is (10 sec: 47514.4, 60 sec: 48196.4, 300 sec: 48207.9). Total num frames: 275922944. Throughput: 0: 12041.0. Samples: 6469928. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:18:30,962][03445] Avg episode reward: [(0, '53.700')] -[2024-07-05 13:18:32,022][03977] Updated weights for policy 0, policy_version 36131 (0.0008) -[2024-07-05 13:18:33,734][03977] Updated weights for policy 0, policy_version 36141 (0.0008) -[2024-07-05 13:18:35,434][03977] Updated weights for policy 0, policy_version 36151 (0.0007) -[2024-07-05 13:18:35,961][03445] Fps is (10 sec: 48333.1, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 276168704. Throughput: 0: 12050.0. Samples: 6506312. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:18:35,962][03445] Avg episode reward: [(0, '52.618')] -[2024-07-05 13:18:37,146][03977] Updated weights for policy 0, policy_version 36161 (0.0009) -[2024-07-05 13:18:38,835][03977] Updated weights for policy 0, policy_version 36171 (0.0007) -[2024-07-05 13:18:40,534][03977] Updated weights for policy 0, policy_version 36181 (0.0008) -[2024-07-05 13:18:40,962][03445] Fps is (10 sec: 48331.2, 60 sec: 48196.1, 300 sec: 48235.6). Total num frames: 276406272. Throughput: 0: 12052.2. Samples: 6578396. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:18:40,962][03445] Avg episode reward: [(0, '53.559')] -[2024-07-05 13:18:42,210][03977] Updated weights for policy 0, policy_version 36191 (0.0007) -[2024-07-05 13:18:43,923][03977] Updated weights for policy 0, policy_version 36201 (0.0010) -[2024-07-05 13:18:45,610][03977] Updated weights for policy 0, policy_version 36211 (0.0008) -[2024-07-05 13:18:45,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 276652032. Throughput: 0: 12054.7. Samples: 6650972. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:18:45,962][03445] Avg episode reward: [(0, '50.510')] -[2024-07-05 13:18:47,308][03977] Updated weights for policy 0, policy_version 36221 (0.0008) -[2024-07-05 13:18:49,008][03977] Updated weights for policy 0, policy_version 36231 (0.0008) -[2024-07-05 13:18:50,690][03977] Updated weights for policy 0, policy_version 36241 (0.0010) -[2024-07-05 13:18:50,961][03445] Fps is (10 sec: 48333.5, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 276889600. Throughput: 0: 12052.3. Samples: 6686924. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:18:50,962][03445] Avg episode reward: [(0, '50.798')] -[2024-07-05 13:18:52,469][03977] Updated weights for policy 0, policy_version 36251 (0.0008) -[2024-07-05 13:18:54,157][03977] Updated weights for policy 0, policy_version 36261 (0.0008) -[2024-07-05 13:18:55,831][03977] Updated weights for policy 0, policy_version 36271 (0.0008) -[2024-07-05 13:18:55,961][03445] Fps is (10 sec: 47513.8, 60 sec: 48059.9, 300 sec: 48263.4). Total num frames: 277127168. Throughput: 0: 12047.5. Samples: 6759092. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:18:55,962][03445] Avg episode reward: [(0, '54.756')] -[2024-07-05 13:18:57,518][03977] Updated weights for policy 0, policy_version 36281 (0.0008) -[2024-07-05 13:18:59,200][03977] Updated weights for policy 0, policy_version 36291 (0.0008) -[2024-07-05 13:19:00,907][03977] Updated weights for policy 0, policy_version 36301 (0.0008) -[2024-07-05 13:19:00,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 277372928. Throughput: 0: 12052.3. Samples: 6831836. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:19:00,962][03445] Avg episode reward: [(0, '50.064')] -[2024-07-05 13:19:02,618][03977] Updated weights for policy 0, policy_version 36311 (0.0008) -[2024-07-05 13:19:04,327][03977] Updated weights for policy 0, policy_version 36321 (0.0008) -[2024-07-05 13:19:05,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.6, 300 sec: 48235.6). Total num frames: 277610496. Throughput: 0: 12032.7. Samples: 6867264. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:19:05,962][03445] Avg episode reward: [(0, '50.146')] -[2024-07-05 13:19:06,031][03977] Updated weights for policy 0, policy_version 36331 (0.0008) -[2024-07-05 13:19:07,737][03977] Updated weights for policy 0, policy_version 36341 (0.0009) -[2024-07-05 13:19:09,421][03977] Updated weights for policy 0, policy_version 36351 (0.0008) -[2024-07-05 13:19:10,962][03445] Fps is (10 sec: 48331.9, 60 sec: 48196.1, 300 sec: 48263.4). Total num frames: 277856256. Throughput: 0: 12046.5. Samples: 6940080. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:19:10,963][03445] Avg episode reward: [(0, '53.163')] -[2024-07-05 13:19:11,129][03977] Updated weights for policy 0, policy_version 36361 (0.0010) -[2024-07-05 13:19:12,823][03977] Updated weights for policy 0, policy_version 36371 (0.0008) -[2024-07-05 13:19:14,548][03977] Updated weights for policy 0, policy_version 36381 (0.0008) -[2024-07-05 13:19:15,961][03445] Fps is (10 sec: 48332.2, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 278093824. Throughput: 0: 12048.2. Samples: 7012100. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:19:15,962][03445] Avg episode reward: [(0, '51.208')] -[2024-07-05 13:19:15,966][03957] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000036389_278093824.pth... -[2024-07-05 13:19:16,039][03957] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000034976_266518528.pth -[2024-07-05 13:19:16,268][03977] Updated weights for policy 0, policy_version 36391 (0.0011) -[2024-07-05 13:19:17,983][03977] Updated weights for policy 0, policy_version 36401 (0.0008) -[2024-07-05 13:19:19,645][03977] Updated weights for policy 0, policy_version 36411 (0.0010) -[2024-07-05 13:19:20,961][03445] Fps is (10 sec: 47514.5, 60 sec: 48059.7, 300 sec: 48235.6). Total num frames: 278331392. Throughput: 0: 12035.1. Samples: 7047892. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:19:20,962][03445] Avg episode reward: [(0, '49.572')] -[2024-07-05 13:19:21,349][03977] Updated weights for policy 0, policy_version 36421 (0.0008) -[2024-07-05 13:19:23,056][03977] Updated weights for policy 0, policy_version 36431 (0.0008) -[2024-07-05 13:19:24,752][03977] Updated weights for policy 0, policy_version 36441 (0.0008) -[2024-07-05 13:19:25,961][03445] Fps is (10 sec: 47514.2, 60 sec: 48059.8, 300 sec: 48235.6). Total num frames: 278568960. Throughput: 0: 12043.9. Samples: 7120368. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:19:25,962][03445] Avg episode reward: [(0, '51.295')] -[2024-07-05 13:19:26,446][03977] Updated weights for policy 0, policy_version 36451 (0.0009) -[2024-07-05 13:19:28,183][03977] Updated weights for policy 0, policy_version 36461 (0.0008) -[2024-07-05 13:19:29,893][03977] Updated weights for policy 0, policy_version 36471 (0.0008) -[2024-07-05 13:19:30,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.1, 300 sec: 48235.6). Total num frames: 278814720. Throughput: 0: 12035.8. Samples: 7192584. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:19:30,962][03445] Avg episode reward: [(0, '52.262')] -[2024-07-05 13:19:31,535][03977] Updated weights for policy 0, policy_version 36481 (0.0008) -[2024-07-05 13:19:33,247][03977] Updated weights for policy 0, policy_version 36491 (0.0011) -[2024-07-05 13:19:34,917][03977] Updated weights for policy 0, policy_version 36501 (0.0008) -[2024-07-05 13:19:35,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48059.7, 300 sec: 48235.6). Total num frames: 279052288. Throughput: 0: 12040.4. Samples: 7228740. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:19:35,962][03445] Avg episode reward: [(0, '54.660')] -[2024-07-05 13:19:36,630][03977] Updated weights for policy 0, policy_version 36511 (0.0008) -[2024-07-05 13:19:38,365][03977] Updated weights for policy 0, policy_version 36521 (0.0008) -[2024-07-05 13:19:40,106][03977] Updated weights for policy 0, policy_version 36531 (0.0010) -[2024-07-05 13:19:40,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48196.4, 300 sec: 48235.6). Total num frames: 279298048. Throughput: 0: 12043.6. Samples: 7301056. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:19:40,962][03445] Avg episode reward: [(0, '52.131')] -[2024-07-05 13:19:41,777][03977] Updated weights for policy 0, policy_version 36541 (0.0008) -[2024-07-05 13:19:43,448][03977] Updated weights for policy 0, policy_version 36551 (0.0007) -[2024-07-05 13:19:45,130][03977] Updated weights for policy 0, policy_version 36561 (0.0008) -[2024-07-05 13:19:45,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48059.8, 300 sec: 48235.6). Total num frames: 279535616. Throughput: 0: 12032.1. Samples: 7373280. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:19:45,962][03445] Avg episode reward: [(0, '50.464')] -[2024-07-05 13:19:46,838][03977] Updated weights for policy 0, policy_version 36571 (0.0008) -[2024-07-05 13:19:48,523][03977] Updated weights for policy 0, policy_version 36581 (0.0008) -[2024-07-05 13:19:50,214][03977] Updated weights for policy 0, policy_version 36591 (0.0008) -[2024-07-05 13:19:50,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 279781376. Throughput: 0: 12060.2. Samples: 7409976. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:19:50,962][03445] Avg episode reward: [(0, '51.943')] -[2024-07-05 13:19:51,931][03977] Updated weights for policy 0, policy_version 36601 (0.0007) -[2024-07-05 13:19:53,625][03977] Updated weights for policy 0, policy_version 36611 (0.0008) -[2024-07-05 13:19:55,362][03977] Updated weights for policy 0, policy_version 36621 (0.0008) -[2024-07-05 13:19:55,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 280018944. Throughput: 0: 12033.6. Samples: 7481592. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:19:55,962][03445] Avg episode reward: [(0, '54.015')] -[2024-07-05 13:19:57,043][03977] Updated weights for policy 0, policy_version 36631 (0.0007) -[2024-07-05 13:19:58,749][03977] Updated weights for policy 0, policy_version 36641 (0.0008) -[2024-07-05 13:20:00,445][03977] Updated weights for policy 0, policy_version 36651 (0.0008) -[2024-07-05 13:20:00,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 280264704. Throughput: 0: 12046.9. Samples: 7554208. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:20:00,962][03445] Avg episode reward: [(0, '52.670')] -[2024-07-05 13:20:02,126][03977] Updated weights for policy 0, policy_version 36661 (0.0012) -[2024-07-05 13:20:03,802][03977] Updated weights for policy 0, policy_version 36671 (0.0008) -[2024-07-05 13:20:05,510][03977] Updated weights for policy 0, policy_version 36681 (0.0010) -[2024-07-05 13:20:05,962][03445] Fps is (10 sec: 48332.2, 60 sec: 48196.1, 300 sec: 48235.6). Total num frames: 280502272. Throughput: 0: 12056.9. Samples: 7590452. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:20:05,963][03445] Avg episode reward: [(0, '53.172')] -[2024-07-05 13:20:07,248][03977] Updated weights for policy 0, policy_version 36691 (0.0008) -[2024-07-05 13:20:08,962][03977] Updated weights for policy 0, policy_version 36701 (0.0008) -[2024-07-05 13:20:10,660][03977] Updated weights for policy 0, policy_version 36711 (0.0007) -[2024-07-05 13:20:10,961][03445] Fps is (10 sec: 47514.1, 60 sec: 48059.9, 300 sec: 48207.8). Total num frames: 280739840. Throughput: 0: 12041.7. Samples: 7662244. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:20:10,962][03445] Avg episode reward: [(0, '49.986')] -[2024-07-05 13:20:12,361][03977] Updated weights for policy 0, policy_version 36721 (0.0007) -[2024-07-05 13:20:14,047][03977] Updated weights for policy 0, policy_version 36731 (0.0008) -[2024-07-05 13:20:15,776][03977] Updated weights for policy 0, policy_version 36741 (0.0008) -[2024-07-05 13:20:15,962][03445] Fps is (10 sec: 48331.7, 60 sec: 48196.0, 300 sec: 48235.6). Total num frames: 280985600. Throughput: 0: 12051.9. Samples: 7734924. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:20:15,963][03445] Avg episode reward: [(0, '51.119')] -[2024-07-05 13:20:17,462][03977] Updated weights for policy 0, policy_version 36751 (0.0008) -[2024-07-05 13:20:19,157][03977] Updated weights for policy 0, policy_version 36761 (0.0008) -[2024-07-05 13:20:20,848][03977] Updated weights for policy 0, policy_version 36771 (0.0009) -[2024-07-05 13:20:20,962][03445] Fps is (10 sec: 48331.9, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 281223168. Throughput: 0: 12049.8. Samples: 7770984. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:20:20,963][03445] Avg episode reward: [(0, '50.977')] -[2024-07-05 13:20:22,575][03977] Updated weights for policy 0, policy_version 36781 (0.0008) -[2024-07-05 13:20:24,278][03977] Updated weights for policy 0, policy_version 36791 (0.0007) -[2024-07-05 13:20:25,962][03445] Fps is (10 sec: 47514.5, 60 sec: 48196.1, 300 sec: 48207.8). Total num frames: 281460736. Throughput: 0: 12041.3. Samples: 7842916. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:20:25,962][03445] Avg episode reward: [(0, '50.524')] -[2024-07-05 13:20:25,983][03977] Updated weights for policy 0, policy_version 36801 (0.0009) -[2024-07-05 13:20:27,667][03977] Updated weights for policy 0, policy_version 36811 (0.0008) -[2024-07-05 13:20:29,363][03977] Updated weights for policy 0, policy_version 36821 (0.0010) -[2024-07-05 13:20:30,961][03445] Fps is (10 sec: 48333.5, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 281706496. Throughput: 0: 12042.8. Samples: 7915204. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:20:30,962][03445] Avg episode reward: [(0, '50.946')] -[2024-07-05 13:20:31,069][03977] Updated weights for policy 0, policy_version 36831 (0.0010) -[2024-07-05 13:20:32,754][03977] Updated weights for policy 0, policy_version 36841 (0.0008) -[2024-07-05 13:20:34,514][03977] Updated weights for policy 0, policy_version 36851 (0.0009) -[2024-07-05 13:20:35,961][03445] Fps is (10 sec: 48333.8, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 281944064. Throughput: 0: 12029.5. Samples: 7951304. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:20:35,962][03445] Avg episode reward: [(0, '51.989')] -[2024-07-05 13:20:36,239][03977] Updated weights for policy 0, policy_version 36861 (0.0008) -[2024-07-05 13:20:37,909][03977] Updated weights for policy 0, policy_version 36871 (0.0008) -[2024-07-05 13:20:39,591][03977] Updated weights for policy 0, policy_version 36881 (0.0011) -[2024-07-05 13:20:40,962][03445] Fps is (10 sec: 48332.0, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 282189824. Throughput: 0: 12050.2. Samples: 8023852. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:20:40,963][03445] Avg episode reward: [(0, '51.462')] -[2024-07-05 13:20:41,255][03977] Updated weights for policy 0, policy_version 36891 (0.0008) -[2024-07-05 13:20:42,952][03977] Updated weights for policy 0, policy_version 36901 (0.0008) -[2024-07-05 13:20:44,617][03977] Updated weights for policy 0, policy_version 36911 (0.0008) -[2024-07-05 13:20:45,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 282427392. Throughput: 0: 12039.4. Samples: 8095980. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:20:45,962][03445] Avg episode reward: [(0, '53.517')] -[2024-07-05 13:20:46,329][03977] Updated weights for policy 0, policy_version 36921 (0.0008) -[2024-07-05 13:20:48,063][03977] Updated weights for policy 0, policy_version 36931 (0.0007) -[2024-07-05 13:20:49,807][03977] Updated weights for policy 0, policy_version 36941 (0.0008) -[2024-07-05 13:20:50,962][03445] Fps is (10 sec: 48332.6, 60 sec: 48196.1, 300 sec: 48235.6). Total num frames: 282673152. Throughput: 0: 12040.2. Samples: 8132260. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:20:50,963][03445] Avg episode reward: [(0, '54.597')] -[2024-07-05 13:20:51,524][03977] Updated weights for policy 0, policy_version 36951 (0.0008) -[2024-07-05 13:20:53,209][03977] Updated weights for policy 0, policy_version 36961 (0.0008) -[2024-07-05 13:20:54,907][03977] Updated weights for policy 0, policy_version 36971 (0.0008) -[2024-07-05 13:20:55,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48207.9). Total num frames: 282910720. Throughput: 0: 12047.6. Samples: 8204388. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:20:55,962][03445] Avg episode reward: [(0, '51.128')] -[2024-07-05 13:20:56,610][03977] Updated weights for policy 0, policy_version 36981 (0.0008) -[2024-07-05 13:20:58,293][03977] Updated weights for policy 0, policy_version 36991 (0.0009) -[2024-07-05 13:20:59,955][03977] Updated weights for policy 0, policy_version 37001 (0.0008) -[2024-07-05 13:21:00,961][03445] Fps is (10 sec: 48333.7, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 283156480. Throughput: 0: 12043.0. Samples: 8276856. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:21:00,962][03445] Avg episode reward: [(0, '52.532')] -[2024-07-05 13:21:01,610][03977] Updated weights for policy 0, policy_version 37011 (0.0010) -[2024-07-05 13:21:03,331][03977] Updated weights for policy 0, policy_version 37021 (0.0008) -[2024-07-05 13:21:05,061][03977] Updated weights for policy 0, policy_version 37031 (0.0011) -[2024-07-05 13:21:05,962][03445] Fps is (10 sec: 48331.8, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 283394048. Throughput: 0: 12048.4. Samples: 8313164. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:21:05,963][03445] Avg episode reward: [(0, '52.555')] -[2024-07-05 13:21:06,742][03977] Updated weights for policy 0, policy_version 37041 (0.0010) -[2024-07-05 13:21:08,403][03977] Updated weights for policy 0, policy_version 37051 (0.0007) -[2024-07-05 13:21:10,113][03977] Updated weights for policy 0, policy_version 37061 (0.0008) -[2024-07-05 13:21:10,962][03445] Fps is (10 sec: 47513.2, 60 sec: 48196.1, 300 sec: 48207.8). Total num frames: 283631616. Throughput: 0: 12065.4. Samples: 8385860. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:21:10,962][03445] Avg episode reward: [(0, '52.385')] -[2024-07-05 13:21:11,801][03977] Updated weights for policy 0, policy_version 37071 (0.0010) -[2024-07-05 13:21:13,499][03977] Updated weights for policy 0, policy_version 37081 (0.0008) -[2024-07-05 13:21:15,218][03977] Updated weights for policy 0, policy_version 37091 (0.0008) -[2024-07-05 13:21:15,961][03445] Fps is (10 sec: 48334.0, 60 sec: 48196.6, 300 sec: 48235.6). Total num frames: 283877376. Throughput: 0: 12062.8. Samples: 8458032. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:21:15,962][03445] Avg episode reward: [(0, '51.386')] -[2024-07-05 13:21:15,965][03957] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000037095_283877376.pth... -[2024-07-05 13:21:16,032][03957] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000035683_272310272.pth -[2024-07-05 13:21:16,951][03977] Updated weights for policy 0, policy_version 37101 (0.0010) -[2024-07-05 13:21:18,623][03977] Updated weights for policy 0, policy_version 37111 (0.0011) -[2024-07-05 13:21:20,313][03977] Updated weights for policy 0, policy_version 37121 (0.0008) -[2024-07-05 13:21:20,961][03445] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 284114944. Throughput: 0: 12058.9. Samples: 8493956. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:21:20,962][03445] Avg episode reward: [(0, '51.886')] -[2024-07-05 13:21:22,004][03977] Updated weights for policy 0, policy_version 37131 (0.0008) -[2024-07-05 13:21:23,739][03977] Updated weights for policy 0, policy_version 37141 (0.0007) -[2024-07-05 13:21:25,423][03977] Updated weights for policy 0, policy_version 37151 (0.0008) -[2024-07-05 13:21:25,961][03445] Fps is (10 sec: 48332.5, 60 sec: 48332.9, 300 sec: 48235.6). Total num frames: 284360704. Throughput: 0: 12060.4. Samples: 8566568. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:21:25,963][03445] Avg episode reward: [(0, '53.219')] -[2024-07-05 13:21:27,106][03977] Updated weights for policy 0, policy_version 37161 (0.0008) -[2024-07-05 13:21:28,822][03977] Updated weights for policy 0, policy_version 37171 (0.0008) -[2024-07-05 13:21:30,521][03977] Updated weights for policy 0, policy_version 37181 (0.0008) -[2024-07-05 13:21:30,961][03445] Fps is (10 sec: 48333.2, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 284598272. Throughput: 0: 12064.2. Samples: 8638868. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:21:30,962][03445] Avg episode reward: [(0, '51.269')] -[2024-07-05 13:21:32,212][03977] Updated weights for policy 0, policy_version 37191 (0.0008) -[2024-07-05 13:21:33,885][03977] Updated weights for policy 0, policy_version 37201 (0.0008) -[2024-07-05 13:21:35,585][03977] Updated weights for policy 0, policy_version 37211 (0.0010) -[2024-07-05 13:21:35,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48332.7, 300 sec: 48207.8). Total num frames: 284844032. Throughput: 0: 12065.8. Samples: 8675220. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:21:35,962][03445] Avg episode reward: [(0, '51.918')] -[2024-07-05 13:21:37,274][03977] Updated weights for policy 0, policy_version 37221 (0.0008) -[2024-07-05 13:21:38,979][03977] Updated weights for policy 0, policy_version 37231 (0.0008) -[2024-07-05 13:21:40,718][03977] Updated weights for policy 0, policy_version 37241 (0.0008) -[2024-07-05 13:21:40,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48196.4, 300 sec: 48207.9). Total num frames: 285081600. Throughput: 0: 12062.0. Samples: 8747180. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:21:40,962][03445] Avg episode reward: [(0, '51.688')] -[2024-07-05 13:21:42,420][03977] Updated weights for policy 0, policy_version 37251 (0.0011) -[2024-07-05 13:21:44,117][03977] Updated weights for policy 0, policy_version 37261 (0.0009) -[2024-07-05 13:21:45,842][03977] Updated weights for policy 0, policy_version 37271 (0.0008) -[2024-07-05 13:21:45,962][03445] Fps is (10 sec: 47513.3, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 285319168. Throughput: 0: 12058.2. Samples: 8819476. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:21:45,963][03445] Avg episode reward: [(0, '54.107')] -[2024-07-05 13:21:47,511][03977] Updated weights for policy 0, policy_version 37281 (0.0008) -[2024-07-05 13:21:49,205][03977] Updated weights for policy 0, policy_version 37291 (0.0008) -[2024-07-05 13:21:50,883][03977] Updated weights for policy 0, policy_version 37301 (0.0008) -[2024-07-05 13:21:50,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48196.4, 300 sec: 48207.8). Total num frames: 285564928. Throughput: 0: 12056.5. Samples: 8855704. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:21:50,962][03445] Avg episode reward: [(0, '49.729')] -[2024-07-05 13:21:52,600][03977] Updated weights for policy 0, policy_version 37311 (0.0007) -[2024-07-05 13:21:54,340][03977] Updated weights for policy 0, policy_version 37321 (0.0011) -[2024-07-05 13:21:55,961][03445] Fps is (10 sec: 48333.3, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 285802496. Throughput: 0: 12041.0. Samples: 8927704. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:21:55,962][03445] Avg episode reward: [(0, '53.014')] -[2024-07-05 13:21:56,090][03977] Updated weights for policy 0, policy_version 37331 (0.0010) -[2024-07-05 13:21:57,758][03977] Updated weights for policy 0, policy_version 37341 (0.0007) -[2024-07-05 13:21:59,463][03977] Updated weights for policy 0, policy_version 37351 (0.0008) -[2024-07-05 13:22:00,961][03445] Fps is (10 sec: 47513.8, 60 sec: 48059.8, 300 sec: 48180.1). Total num frames: 286040064. Throughput: 0: 12032.5. Samples: 8999496. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:22:00,962][03445] Avg episode reward: [(0, '52.494')] -[2024-07-05 13:22:01,150][03977] Updated weights for policy 0, policy_version 37361 (0.0008) -[2024-07-05 13:22:02,857][03977] Updated weights for policy 0, policy_version 37371 (0.0008) -[2024-07-05 13:22:04,553][03977] Updated weights for policy 0, policy_version 37381 (0.0008) -[2024-07-05 13:22:05,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48196.4, 300 sec: 48180.1). Total num frames: 286285824. Throughput: 0: 12044.0. Samples: 9035936. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:22:05,963][03445] Avg episode reward: [(0, '51.189')] -[2024-07-05 13:22:06,265][03977] Updated weights for policy 0, policy_version 37391 (0.0009) -[2024-07-05 13:22:07,935][03977] Updated weights for policy 0, policy_version 37401 (0.0008) -[2024-07-05 13:22:09,658][03977] Updated weights for policy 0, policy_version 37411 (0.0008) -[2024-07-05 13:22:10,961][03445] Fps is (10 sec: 48332.2, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 286523392. Throughput: 0: 12029.0. Samples: 9107872. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:22:10,962][03445] Avg episode reward: [(0, '50.696')] -[2024-07-05 13:22:11,373][03977] Updated weights for policy 0, policy_version 37421 (0.0008) -[2024-07-05 13:22:13,058][03977] Updated weights for policy 0, policy_version 37431 (0.0010) -[2024-07-05 13:22:14,751][03977] Updated weights for policy 0, policy_version 37441 (0.0011) -[2024-07-05 13:22:15,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 286769152. Throughput: 0: 12034.6. Samples: 9180424. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:22:15,963][03445] Avg episode reward: [(0, '52.604')] -[2024-07-05 13:22:16,443][03977] Updated weights for policy 0, policy_version 37451 (0.0008) -[2024-07-05 13:22:18,143][03977] Updated weights for policy 0, policy_version 37461 (0.0010) -[2024-07-05 13:22:19,861][03977] Updated weights for policy 0, policy_version 37471 (0.0007) -[2024-07-05 13:22:20,962][03445] Fps is (10 sec: 48332.3, 60 sec: 48196.2, 300 sec: 48180.0). Total num frames: 287006720. Throughput: 0: 12031.3. Samples: 9216628. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:22:20,963][03445] Avg episode reward: [(0, '52.227')] -[2024-07-05 13:22:21,576][03977] Updated weights for policy 0, policy_version 37481 (0.0010) -[2024-07-05 13:22:23,251][03977] Updated weights for policy 0, policy_version 37491 (0.0008) -[2024-07-05 13:22:24,973][03977] Updated weights for policy 0, policy_version 37501 (0.0007) -[2024-07-05 13:22:25,961][03445] Fps is (10 sec: 47513.5, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 287244288. Throughput: 0: 12037.9. Samples: 9288884. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:22:25,962][03445] Avg episode reward: [(0, '52.347')] -[2024-07-05 13:22:26,651][03977] Updated weights for policy 0, policy_version 37511 (0.0008) -[2024-07-05 13:22:28,370][03977] Updated weights for policy 0, policy_version 37521 (0.0011) -[2024-07-05 13:22:30,046][03977] Updated weights for policy 0, policy_version 37531 (0.0010) -[2024-07-05 13:22:30,962][03445] Fps is (10 sec: 48333.3, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 287490048. Throughput: 0: 12036.9. Samples: 9361136. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:22:30,963][03445] Avg episode reward: [(0, '54.603')] -[2024-07-05 13:22:31,753][03977] Updated weights for policy 0, policy_version 37541 (0.0011) -[2024-07-05 13:22:33,433][03977] Updated weights for policy 0, policy_version 37551 (0.0010) -[2024-07-05 13:22:35,185][03977] Updated weights for policy 0, policy_version 37561 (0.0014) -[2024-07-05 13:22:35,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 287727616. Throughput: 0: 12032.7. Samples: 9397176. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:22:35,962][03445] Avg episode reward: [(0, '52.647')] -[2024-07-05 13:22:36,897][03977] Updated weights for policy 0, policy_version 37571 (0.0010) -[2024-07-05 13:22:38,625][03977] Updated weights for policy 0, policy_version 37581 (0.0009) -[2024-07-05 13:22:40,311][03977] Updated weights for policy 0, policy_version 37591 (0.0008) -[2024-07-05 13:22:40,961][03445] Fps is (10 sec: 47514.0, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 287965184. Throughput: 0: 12027.3. Samples: 9468932. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:22:40,963][03445] Avg episode reward: [(0, '52.491')] -[2024-07-05 13:22:42,029][03977] Updated weights for policy 0, policy_version 37601 (0.0008) -[2024-07-05 13:22:43,722][03977] Updated weights for policy 0, policy_version 37611 (0.0007) -[2024-07-05 13:22:45,416][03977] Updated weights for policy 0, policy_version 37621 (0.0008) -[2024-07-05 13:22:45,961][03445] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 288210944. Throughput: 0: 12044.0. Samples: 9541476. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:22:45,963][03445] Avg episode reward: [(0, '53.535')] -[2024-07-05 13:22:47,072][03977] Updated weights for policy 0, policy_version 37631 (0.0009) -[2024-07-05 13:22:48,773][03977] Updated weights for policy 0, policy_version 37641 (0.0008) -[2024-07-05 13:22:50,476][03977] Updated weights for policy 0, policy_version 37651 (0.0008) -[2024-07-05 13:22:50,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48059.7, 300 sec: 48152.3). Total num frames: 288448512. Throughput: 0: 12038.5. Samples: 9577668. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:22:50,963][03445] Avg episode reward: [(0, '53.158')] -[2024-07-05 13:22:52,176][03977] Updated weights for policy 0, policy_version 37661 (0.0010) -[2024-07-05 13:22:53,879][03977] Updated weights for policy 0, policy_version 37671 (0.0008) -[2024-07-05 13:22:55,561][03977] Updated weights for policy 0, policy_version 37681 (0.0007) -[2024-07-05 13:22:55,962][03445] Fps is (10 sec: 48332.3, 60 sec: 48196.1, 300 sec: 48180.0). Total num frames: 288694272. Throughput: 0: 12046.8. Samples: 9649980. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:22:55,962][03445] Avg episode reward: [(0, '53.513')] -[2024-07-05 13:22:57,253][03977] Updated weights for policy 0, policy_version 37691 (0.0008) -[2024-07-05 13:22:58,969][03977] Updated weights for policy 0, policy_version 37701 (0.0007) -[2024-07-05 13:23:00,687][03977] Updated weights for policy 0, policy_version 37711 (0.0007) -[2024-07-05 13:23:00,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 288931840. Throughput: 0: 12040.9. Samples: 9722264. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:23:00,963][03445] Avg episode reward: [(0, '51.751')] -[2024-07-05 13:23:02,411][03977] Updated weights for policy 0, policy_version 37721 (0.0008) -[2024-07-05 13:23:04,094][03977] Updated weights for policy 0, policy_version 37731 (0.0008) -[2024-07-05 13:23:05,775][03977] Updated weights for policy 0, policy_version 37741 (0.0008) -[2024-07-05 13:23:05,962][03445] Fps is (10 sec: 48333.2, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 289177600. Throughput: 0: 12037.0. Samples: 9758292. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:23:05,963][03445] Avg episode reward: [(0, '53.752')] -[2024-07-05 13:23:07,462][03977] Updated weights for policy 0, policy_version 37751 (0.0008) -[2024-07-05 13:23:09,188][03977] Updated weights for policy 0, policy_version 37761 (0.0008) -[2024-07-05 13:23:10,882][03977] Updated weights for policy 0, policy_version 37771 (0.0008) -[2024-07-05 13:23:10,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 289415168. Throughput: 0: 12037.2. Samples: 9830560. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:23:10,962][03445] Avg episode reward: [(0, '51.899')] -[2024-07-05 13:23:12,596][03977] Updated weights for policy 0, policy_version 37781 (0.0008) -[2024-07-05 13:23:14,298][03977] Updated weights for policy 0, policy_version 37791 (0.0007) -[2024-07-05 13:23:15,962][03445] Fps is (10 sec: 47513.4, 60 sec: 48059.6, 300 sec: 48152.3). Total num frames: 289652736. Throughput: 0: 12039.1. Samples: 9902896. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:23:15,963][03445] Avg episode reward: [(0, '52.306')] -[2024-07-05 13:23:16,015][03957] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000037801_289660928.pth... -[2024-07-05 13:23:16,019][03977] Updated weights for policy 0, policy_version 37801 (0.0008) -[2024-07-05 13:23:16,085][03957] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000036389_278093824.pth -[2024-07-05 13:23:17,744][03977] Updated weights for policy 0, policy_version 37811 (0.0008) -[2024-07-05 13:23:19,448][03977] Updated weights for policy 0, policy_version 37821 (0.0008) -[2024-07-05 13:23:20,961][03445] Fps is (10 sec: 48333.0, 60 sec: 48196.4, 300 sec: 48180.1). Total num frames: 289898496. Throughput: 0: 12031.6. Samples: 9938596. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:23:20,963][03445] Avg episode reward: [(0, '54.534')] -[2024-07-05 13:23:21,161][03977] Updated weights for policy 0, policy_version 37831 (0.0010) -[2024-07-05 13:23:22,823][03977] Updated weights for policy 0, policy_version 37841 (0.0008) -[2024-07-05 13:23:24,531][03977] Updated weights for policy 0, policy_version 37851 (0.0008) -[2024-07-05 13:23:25,962][03445] Fps is (10 sec: 48332.4, 60 sec: 48196.1, 300 sec: 48180.0). Total num frames: 290136064. Throughput: 0: 12039.2. Samples: 10010700. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:23:25,963][03445] Avg episode reward: [(0, '53.407')] -[2024-07-05 13:23:26,255][03977] Updated weights for policy 0, policy_version 37861 (0.0008) -[2024-07-05 13:23:27,949][03977] Updated weights for policy 0, policy_version 37871 (0.0008) -[2024-07-05 13:23:29,663][03977] Updated weights for policy 0, policy_version 37881 (0.0011) -[2024-07-05 13:23:30,961][03445] Fps is (10 sec: 47513.3, 60 sec: 48059.8, 300 sec: 48152.3). Total num frames: 290373632. Throughput: 0: 12018.0. Samples: 10082284. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:23:30,963][03445] Avg episode reward: [(0, '53.428')] -[2024-07-05 13:23:31,372][03977] Updated weights for policy 0, policy_version 37891 (0.0008) -[2024-07-05 13:23:33,093][03977] Updated weights for policy 0, policy_version 37901 (0.0012) -[2024-07-05 13:23:34,772][03977] Updated weights for policy 0, policy_version 37911 (0.0008) -[2024-07-05 13:23:35,961][03445] Fps is (10 sec: 47514.2, 60 sec: 48059.7, 300 sec: 48152.3). Total num frames: 290611200. Throughput: 0: 12021.0. Samples: 10118616. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:23:35,962][03445] Avg episode reward: [(0, '54.163')] -[2024-07-05 13:23:36,458][03977] Updated weights for policy 0, policy_version 37921 (0.0007) -[2024-07-05 13:23:38,194][03977] Updated weights for policy 0, policy_version 37931 (0.0008) -[2024-07-05 13:23:39,864][03977] Updated weights for policy 0, policy_version 37941 (0.0007) -[2024-07-05 13:23:40,965][03445] Fps is (10 sec: 48317.5, 60 sec: 48193.7, 300 sec: 48151.8). Total num frames: 290856960. Throughput: 0: 12020.2. Samples: 10190924. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:23:40,966][03445] Avg episode reward: [(0, '52.222')] -[2024-07-05 13:23:41,573][03977] Updated weights for policy 0, policy_version 37951 (0.0008) -[2024-07-05 13:23:43,241][03977] Updated weights for policy 0, policy_version 37961 (0.0008) -[2024-07-05 13:23:44,944][03977] Updated weights for policy 0, policy_version 37971 (0.0008) -[2024-07-05 13:23:45,961][03445] Fps is (10 sec: 48333.1, 60 sec: 48059.8, 300 sec: 48152.3). Total num frames: 291094528. Throughput: 0: 12020.4. Samples: 10263180. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:23:45,963][03445] Avg episode reward: [(0, '54.066')] -[2024-07-05 13:23:46,655][03977] Updated weights for policy 0, policy_version 37981 (0.0011) -[2024-07-05 13:23:48,400][03977] Updated weights for policy 0, policy_version 37991 (0.0008) -[2024-07-05 13:23:50,077][03977] Updated weights for policy 0, policy_version 38001 (0.0010) -[2024-07-05 13:23:50,961][03445] Fps is (10 sec: 48348.2, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 291340288. Throughput: 0: 12024.8. Samples: 10299408. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:23:50,963][03445] Avg episode reward: [(0, '50.822')] -[2024-07-05 13:23:51,802][03977] Updated weights for policy 0, policy_version 38011 (0.0008) -[2024-07-05 13:23:53,501][03977] Updated weights for policy 0, policy_version 38021 (0.0009) -[2024-07-05 13:23:55,190][03977] Updated weights for policy 0, policy_version 38031 (0.0008) -[2024-07-05 13:23:55,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48059.9, 300 sec: 48152.3). Total num frames: 291577856. Throughput: 0: 12014.7. Samples: 10371220. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:23:55,962][03445] Avg episode reward: [(0, '54.116')] -[2024-07-05 13:23:56,870][03977] Updated weights for policy 0, policy_version 38041 (0.0008) -[2024-07-05 13:23:58,545][03977] Updated weights for policy 0, policy_version 38051 (0.0008) -[2024-07-05 13:24:00,245][03977] Updated weights for policy 0, policy_version 38061 (0.0008) -[2024-07-05 13:24:00,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 291823616. Throughput: 0: 12018.8. Samples: 10443740. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:24:00,962][03445] Avg episode reward: [(0, '53.582')] -[2024-07-05 13:24:01,966][03977] Updated weights for policy 0, policy_version 38071 (0.0008) -[2024-07-05 13:24:03,662][03977] Updated weights for policy 0, policy_version 38081 (0.0008) -[2024-07-05 13:24:05,372][03977] Updated weights for policy 0, policy_version 38091 (0.0009) -[2024-07-05 13:24:05,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48059.8, 300 sec: 48152.3). Total num frames: 292061184. Throughput: 0: 12028.2. Samples: 10479864. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:24:05,962][03445] Avg episode reward: [(0, '52.272')] -[2024-07-05 13:24:07,078][03977] Updated weights for policy 0, policy_version 38101 (0.0010) -[2024-07-05 13:24:08,766][03977] Updated weights for policy 0, policy_version 38111 (0.0007) -[2024-07-05 13:24:10,496][03977] Updated weights for policy 0, policy_version 38121 (0.0011) -[2024-07-05 13:24:10,961][03445] Fps is (10 sec: 47514.0, 60 sec: 48059.8, 300 sec: 48152.3). Total num frames: 292298752. Throughput: 0: 12037.5. Samples: 10552384. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:24:10,962][03445] Avg episode reward: [(0, '52.236')] -[2024-07-05 13:24:12,190][03977] Updated weights for policy 0, policy_version 38131 (0.0008) -[2024-07-05 13:24:13,872][03977] Updated weights for policy 0, policy_version 38141 (0.0008) -[2024-07-05 13:24:15,593][03977] Updated weights for policy 0, policy_version 38151 (0.0008) -[2024-07-05 13:24:15,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.4, 300 sec: 48180.1). Total num frames: 292544512. Throughput: 0: 12049.7. Samples: 10624520. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:24:15,962][03445] Avg episode reward: [(0, '52.884')] -[2024-07-05 13:24:17,318][03977] Updated weights for policy 0, policy_version 38161 (0.0008) -[2024-07-05 13:24:19,054][03977] Updated weights for policy 0, policy_version 38171 (0.0008) -[2024-07-05 13:24:20,772][03977] Updated weights for policy 0, policy_version 38181 (0.0009) -[2024-07-05 13:24:20,961][03445] Fps is (10 sec: 48332.3, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 292782080. Throughput: 0: 12036.9. Samples: 10660276. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:24:20,962][03445] Avg episode reward: [(0, '52.163')] -[2024-07-05 13:24:22,384][03977] Updated weights for policy 0, policy_version 38191 (0.0015) -[2024-07-05 13:24:24,069][03977] Updated weights for policy 0, policy_version 38201 (0.0008) -[2024-07-05 13:24:25,799][03977] Updated weights for policy 0, policy_version 38211 (0.0007) -[2024-07-05 13:24:25,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48196.4, 300 sec: 48180.1). Total num frames: 293027840. Throughput: 0: 12035.3. Samples: 10732472. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:24:25,962][03445] Avg episode reward: [(0, '53.288')] -[2024-07-05 13:24:27,515][03977] Updated weights for policy 0, policy_version 38221 (0.0009) -[2024-07-05 13:24:29,237][03977] Updated weights for policy 0, policy_version 38231 (0.0008) -[2024-07-05 13:24:30,906][03977] Updated weights for policy 0, policy_version 38241 (0.0007) -[2024-07-05 13:24:30,961][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 293265408. Throughput: 0: 12042.8. Samples: 10805104. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:24:30,963][03445] Avg episode reward: [(0, '53.365')] -[2024-07-05 13:24:32,611][03977] Updated weights for policy 0, policy_version 38251 (0.0008) -[2024-07-05 13:24:34,326][03977] Updated weights for policy 0, policy_version 38261 (0.0008) -[2024-07-05 13:24:35,961][03445] Fps is (10 sec: 47513.7, 60 sec: 48196.3, 300 sec: 48152.3). Total num frames: 293502976. Throughput: 0: 12031.0. Samples: 10840804. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:24:35,962][03445] Avg episode reward: [(0, '53.551')] -[2024-07-05 13:24:36,037][03977] Updated weights for policy 0, policy_version 38271 (0.0008) -[2024-07-05 13:24:37,744][03977] Updated weights for policy 0, policy_version 38281 (0.0009) -[2024-07-05 13:24:39,411][03977] Updated weights for policy 0, policy_version 38291 (0.0008) -[2024-07-05 13:24:40,961][03445] Fps is (10 sec: 47513.4, 60 sec: 48062.3, 300 sec: 48152.3). Total num frames: 293740544. Throughput: 0: 12037.9. Samples: 10912924. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:24:40,962][03445] Avg episode reward: [(0, '52.827')] -[2024-07-05 13:24:41,150][03977] Updated weights for policy 0, policy_version 38301 (0.0011) -[2024-07-05 13:24:42,811][03977] Updated weights for policy 0, policy_version 38311 (0.0009) -[2024-07-05 13:24:44,492][03977] Updated weights for policy 0, policy_version 38321 (0.0008) -[2024-07-05 13:24:45,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48152.3). Total num frames: 293986304. Throughput: 0: 12039.4. Samples: 10985512. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:24:45,963][03445] Avg episode reward: [(0, '52.497')] -[2024-07-05 13:24:46,227][03977] Updated weights for policy 0, policy_version 38331 (0.0008) -[2024-07-05 13:24:47,946][03977] Updated weights for policy 0, policy_version 38341 (0.0010) -[2024-07-05 13:24:49,666][03977] Updated weights for policy 0, policy_version 38351 (0.0008) -[2024-07-05 13:24:50,962][03445] Fps is (10 sec: 48332.3, 60 sec: 48059.6, 300 sec: 48152.3). Total num frames: 294223872. Throughput: 0: 12032.0. Samples: 11021308. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:24:50,963][03445] Avg episode reward: [(0, '52.806')] -[2024-07-05 13:24:51,356][03977] Updated weights for policy 0, policy_version 38361 (0.0010) -[2024-07-05 13:24:53,050][03977] Updated weights for policy 0, policy_version 38371 (0.0009) -[2024-07-05 13:24:54,743][03977] Updated weights for policy 0, policy_version 38381 (0.0008) -[2024-07-05 13:24:55,961][03445] Fps is (10 sec: 48332.6, 60 sec: 48196.2, 300 sec: 48152.3). Total num frames: 294469632. Throughput: 0: 12035.8. Samples: 11093996. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:24:55,963][03445] Avg episode reward: [(0, '51.065')] -[2024-07-05 13:24:56,459][03977] Updated weights for policy 0, policy_version 38391 (0.0008) -[2024-07-05 13:24:58,159][03977] Updated weights for policy 0, policy_version 38401 (0.0009) -[2024-07-05 13:24:59,824][03977] Updated weights for policy 0, policy_version 38411 (0.0008) -[2024-07-05 13:25:00,961][03445] Fps is (10 sec: 48333.3, 60 sec: 48059.7, 300 sec: 48152.3). Total num frames: 294707200. Throughput: 0: 12032.6. Samples: 11165988. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:25:00,963][03445] Avg episode reward: [(0, '51.976')] -[2024-07-05 13:25:01,531][03977] Updated weights for policy 0, policy_version 38421 (0.0010) -[2024-07-05 13:25:03,237][03977] Updated weights for policy 0, policy_version 38431 (0.0008) -[2024-07-05 13:25:04,914][03977] Updated weights for policy 0, policy_version 38441 (0.0008) -[2024-07-05 13:25:05,962][03445] Fps is (10 sec: 48332.2, 60 sec: 48196.1, 300 sec: 48180.0). Total num frames: 294952960. Throughput: 0: 12042.5. Samples: 11202192. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:25:05,963][03445] Avg episode reward: [(0, '49.785')] -[2024-07-05 13:25:06,605][03977] Updated weights for policy 0, policy_version 38451 (0.0010) -[2024-07-05 13:25:08,298][03977] Updated weights for policy 0, policy_version 38461 (0.0008) -[2024-07-05 13:25:10,006][03977] Updated weights for policy 0, policy_version 38471 (0.0008) -[2024-07-05 13:25:10,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.2, 300 sec: 48152.4). Total num frames: 295190528. Throughput: 0: 12047.8. Samples: 11274624. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:25:10,963][03445] Avg episode reward: [(0, '53.219')] -[2024-07-05 13:25:11,699][03977] Updated weights for policy 0, policy_version 38481 (0.0008) -[2024-07-05 13:25:13,412][03977] Updated weights for policy 0, policy_version 38491 (0.0010) -[2024-07-05 13:25:15,102][03977] Updated weights for policy 0, policy_version 38501 (0.0007) -[2024-07-05 13:25:15,961][03445] Fps is (10 sec: 48333.5, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 295436288. Throughput: 0: 12038.7. Samples: 11346844. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:25:15,963][03445] Avg episode reward: [(0, '54.658')] -[2024-07-05 13:25:15,966][03957] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000038506_295436288.pth... -[2024-07-05 13:25:16,032][03957] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000037095_283877376.pth -[2024-07-05 13:25:16,807][03977] Updated weights for policy 0, policy_version 38511 (0.0008) -[2024-07-05 13:25:18,550][03977] Updated weights for policy 0, policy_version 38521 (0.0008) -[2024-07-05 13:25:20,262][03977] Updated weights for policy 0, policy_version 38531 (0.0007) -[2024-07-05 13:25:20,961][03445] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 295673856. Throughput: 0: 12050.3. Samples: 11383068. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:25:20,963][03445] Avg episode reward: [(0, '54.345')] -[2024-07-05 13:25:21,914][03977] Updated weights for policy 0, policy_version 38541 (0.0011) -[2024-07-05 13:25:23,649][03977] Updated weights for policy 0, policy_version 38551 (0.0008) -[2024-07-05 13:25:25,355][03977] Updated weights for policy 0, policy_version 38561 (0.0009) -[2024-07-05 13:25:25,961][03445] Fps is (10 sec: 47513.7, 60 sec: 48059.7, 300 sec: 48152.3). Total num frames: 295911424. Throughput: 0: 12041.6. Samples: 11454796. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:25:25,963][03445] Avg episode reward: [(0, '51.922')] -[2024-07-05 13:25:27,079][03977] Updated weights for policy 0, policy_version 38571 (0.0008) -[2024-07-05 13:25:28,756][03977] Updated weights for policy 0, policy_version 38581 (0.0008) -[2024-07-05 13:25:30,443][03977] Updated weights for policy 0, policy_version 38591 (0.0008) -[2024-07-05 13:25:30,961][03445] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 296157184. Throughput: 0: 12042.0. Samples: 11527400. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 13:25:30,962][03445] Avg episode reward: [(0, '52.085')] -[2024-07-05 13:25:32,123][03977] Updated weights for policy 0, policy_version 38601 (0.0008) -[2024-07-05 13:25:33,835][03977] Updated weights for policy 0, policy_version 38611 (0.0009) -[2024-07-05 13:25:35,516][03977] Updated weights for policy 0, policy_version 38621 (0.0010) -[2024-07-05 13:25:35,962][03445] Fps is (10 sec: 48332.3, 60 sec: 48196.2, 300 sec: 48152.3). Total num frames: 296394752. Throughput: 0: 12047.3. Samples: 11563436. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:25:35,962][03445] Avg episode reward: [(0, '52.036')] -[2024-07-05 13:25:37,228][03977] Updated weights for policy 0, policy_version 38631 (0.0007) -[2024-07-05 13:25:38,929][03977] Updated weights for policy 0, policy_version 38641 (0.0008) -[2024-07-05 13:25:40,683][03977] Updated weights for policy 0, policy_version 38651 (0.0008) -[2024-07-05 13:25:40,962][03445] Fps is (10 sec: 47513.0, 60 sec: 48196.2, 300 sec: 48152.3). Total num frames: 296632320. Throughput: 0: 12028.2. Samples: 11635264. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:25:40,963][03445] Avg episode reward: [(0, '52.565')] -[2024-07-05 13:25:42,352][03977] Updated weights for policy 0, policy_version 38661 (0.0007) -[2024-07-05 13:25:44,089][03977] Updated weights for policy 0, policy_version 38671 (0.0008) -[2024-07-05 13:25:45,780][03977] Updated weights for policy 0, policy_version 38681 (0.0008) -[2024-07-05 13:25:45,961][03445] Fps is (10 sec: 48333.2, 60 sec: 48196.3, 300 sec: 48152.3). Total num frames: 296878080. Throughput: 0: 12038.4. Samples: 11707716. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:25:45,962][03445] Avg episode reward: [(0, '52.545')] -[2024-07-05 13:25:47,497][03977] Updated weights for policy 0, policy_version 38691 (0.0008) -[2024-07-05 13:25:49,173][03977] Updated weights for policy 0, policy_version 38701 (0.0008) -[2024-07-05 13:25:50,872][03977] Updated weights for policy 0, policy_version 38711 (0.0007) -[2024-07-05 13:25:50,961][03445] Fps is (10 sec: 48333.4, 60 sec: 48196.4, 300 sec: 48152.3). Total num frames: 297115648. Throughput: 0: 12032.1. Samples: 11743632. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:25:50,962][03445] Avg episode reward: [(0, '53.388')] -[2024-07-05 13:25:52,594][03977] Updated weights for policy 0, policy_version 38721 (0.0008) -[2024-07-05 13:25:54,295][03977] Updated weights for policy 0, policy_version 38731 (0.0007) -[2024-07-05 13:25:55,961][03445] Fps is (10 sec: 47513.5, 60 sec: 48059.7, 300 sec: 48124.5). Total num frames: 297353216. Throughput: 0: 12021.2. Samples: 11815580. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:25:55,963][03445] Avg episode reward: [(0, '52.068')] -[2024-07-05 13:25:55,969][03977] Updated weights for policy 0, policy_version 38741 (0.0009) -[2024-07-05 13:25:57,642][03977] Updated weights for policy 0, policy_version 38751 (0.0007) -[2024-07-05 13:25:59,356][03977] Updated weights for policy 0, policy_version 38761 (0.0008) -[2024-07-05 13:26:00,961][03445] Fps is (10 sec: 48332.4, 60 sec: 48196.3, 300 sec: 48152.3). Total num frames: 297598976. Throughput: 0: 12030.0. Samples: 11888196. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 13:26:00,962][03445] Avg episode reward: [(0, '52.093')] -[2024-07-05 13:26:01,095][03977] Updated weights for policy 0, policy_version 38771 (0.0007) -[2024-07-05 13:26:02,812][03977] Updated weights for policy 0, policy_version 38781 (0.0008) -[2024-07-05 13:26:04,511][03977] Updated weights for policy 0, policy_version 38791 (0.0008) -[2024-07-05 13:26:05,961][03445] Fps is (10 sec: 48333.2, 60 sec: 48059.9, 300 sec: 48152.3). Total num frames: 297836544. Throughput: 0: 12021.2. Samples: 11924020. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 13:26:05,962][03445] Avg episode reward: [(0, '52.423')] -[2024-07-05 13:26:06,209][03977] Updated weights for policy 0, policy_version 38801 (0.0008) -[2024-07-05 13:26:07,900][03977] Updated weights for policy 0, policy_version 38811 (0.0009) -[2024-07-05 13:26:09,601][03977] Updated weights for policy 0, policy_version 38821 (0.0008) -[2024-07-05 13:26:10,961][03445] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48152.3). Total num frames: 298082304. Throughput: 0: 12034.4. Samples: 11996344. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 13:26:10,963][03445] Avg episode reward: [(0, '52.202')] -[2024-07-05 13:26:11,293][03977] Updated weights for policy 0, policy_version 38831 (0.0008) -[2024-07-05 13:26:13,012][03977] Updated weights for policy 0, policy_version 38841 (0.0008) -[2024-07-05 13:26:14,764][03977] Updated weights for policy 0, policy_version 38851 (0.0008) -[2024-07-05 13:26:15,961][03445] Fps is (10 sec: 48332.5, 60 sec: 48059.7, 300 sec: 48152.3). Total num frames: 298319872. Throughput: 0: 12016.8. Samples: 12068156. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 13:26:15,963][03445] Avg episode reward: [(0, '51.412')] -[2024-07-05 13:26:16,474][03977] Updated weights for policy 0, policy_version 38861 (0.0008) -[2024-07-05 13:26:18,196][03977] Updated weights for policy 0, policy_version 38871 (0.0008) -[2024-07-05 13:26:19,863][03977] Updated weights for policy 0, policy_version 38881 (0.0008) -[2024-07-05 13:26:20,962][03445] Fps is (10 sec: 47513.4, 60 sec: 48059.7, 300 sec: 48124.5). Total num frames: 298557440. Throughput: 0: 12012.4. Samples: 12103992. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 13:26:20,963][03445] Avg episode reward: [(0, '53.604')] -[2024-07-05 13:26:21,541][03977] Updated weights for policy 0, policy_version 38891 (0.0008) -[2024-07-05 13:26:23,262][03977] Updated weights for policy 0, policy_version 38901 (0.0009) -[2024-07-05 13:26:24,949][03977] Updated weights for policy 0, policy_version 38911 (0.0007) -[2024-07-05 13:26:25,962][03445] Fps is (10 sec: 47513.4, 60 sec: 48059.7, 300 sec: 48124.5). Total num frames: 298795008. Throughput: 0: 12020.4. Samples: 12176184. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:26:25,963][03445] Avg episode reward: [(0, '53.661')] -[2024-07-05 13:26:26,663][03977] Updated weights for policy 0, policy_version 38921 (0.0008) -[2024-07-05 13:26:28,402][03977] Updated weights for policy 0, policy_version 38931 (0.0007) -[2024-07-05 13:26:30,064][03977] Updated weights for policy 0, policy_version 38941 (0.0008) -[2024-07-05 13:26:30,961][03445] Fps is (10 sec: 48333.3, 60 sec: 48059.7, 300 sec: 48124.5). Total num frames: 299040768. Throughput: 0: 12025.7. Samples: 12248872. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:26:30,962][03445] Avg episode reward: [(0, '52.767')] -[2024-07-05 13:26:31,766][03977] Updated weights for policy 0, policy_version 38951 (0.0007) -[2024-07-05 13:26:33,451][03977] Updated weights for policy 0, policy_version 38961 (0.0008) -[2024-07-05 13:26:35,177][03977] Updated weights for policy 0, policy_version 38971 (0.0007) -[2024-07-05 13:26:35,961][03445] Fps is (10 sec: 48333.2, 60 sec: 48059.8, 300 sec: 48124.5). Total num frames: 299278336. Throughput: 0: 12022.7. Samples: 12284652. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:26:35,962][03445] Avg episode reward: [(0, '50.962')] -[2024-07-05 13:26:36,895][03977] Updated weights for policy 0, policy_version 38981 (0.0010) -[2024-07-05 13:26:38,623][03977] Updated weights for policy 0, policy_version 38991 (0.0009) -[2024-07-05 13:26:40,346][03977] Updated weights for policy 0, policy_version 39001 (0.0010) -[2024-07-05 13:26:40,961][03445] Fps is (10 sec: 47513.3, 60 sec: 48059.8, 300 sec: 48124.5). Total num frames: 299515904. Throughput: 0: 12013.5. Samples: 12356188. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:26:40,962][03445] Avg episode reward: [(0, '53.595')] -[2024-07-05 13:26:42,041][03977] Updated weights for policy 0, policy_version 39011 (0.0010) -[2024-07-05 13:26:43,763][03977] Updated weights for policy 0, policy_version 39021 (0.0009) -[2024-07-05 13:26:45,579][03977] Updated weights for policy 0, policy_version 39031 (0.0011) -[2024-07-05 13:26:45,961][03445] Fps is (10 sec: 47513.5, 60 sec: 47923.2, 300 sec: 48096.8). Total num frames: 299753472. Throughput: 0: 11982.9. Samples: 12427428. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:26:45,963][03445] Avg episode reward: [(0, '54.726')] -[2024-07-05 13:26:47,296][03977] Updated weights for policy 0, policy_version 39041 (0.0007) -[2024-07-05 13:26:49,025][03977] Updated weights for policy 0, policy_version 39051 (0.0011) -[2024-07-05 13:26:50,762][03977] Updated weights for policy 0, policy_version 39061 (0.0008) -[2024-07-05 13:26:50,961][03445] Fps is (10 sec: 47513.8, 60 sec: 47923.2, 300 sec: 48096.8). Total num frames: 299991040. Throughput: 0: 11974.8. Samples: 12462888. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:26:50,962][03445] Avg episode reward: [(0, '53.140')] -[2024-07-05 13:26:51,464][03957] Stopping Batcher_0... -[2024-07-05 13:26:51,465][03957] Loop batcher_evt_loop terminating... -[2024-07-05 13:26:51,464][03445] Component Batcher_0 stopped! -[2024-07-05 13:26:51,466][03957] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000039065_300015616.pth... -[2024-07-05 13:26:51,493][03445] Component RolloutWorker_w2 stopped! -[2024-07-05 13:26:51,494][04008] Stopping RolloutWorker_w15... -[2024-07-05 13:26:51,494][03984] Stopping RolloutWorker_w6... -[2024-07-05 13:26:51,494][03984] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 13:26:51,494][03980] Stopping RolloutWorker_w2... -[2024-07-05 13:26:51,494][03982] Stopping RolloutWorker_w4... -[2024-07-05 13:26:51,494][04004] Stopping RolloutWorker_w11... -[2024-07-05 13:26:51,494][03445] Component RolloutWorker_w15 stopped! -[2024-07-05 13:26:51,495][04004] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 13:26:51,495][03982] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 13:26:51,495][04008] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 13:26:51,495][03980] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 13:26:51,495][03988] Stopping RolloutWorker_w9... -[2024-07-05 13:26:51,495][03979] Stopping RolloutWorker_w1... -[2024-07-05 13:26:51,495][03978] Stopping RolloutWorker_w0... -[2024-07-05 13:26:51,495][03445] Component RolloutWorker_w6 stopped! -[2024-07-05 13:26:51,496][03988] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 13:26:51,496][03979] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 13:26:51,496][03978] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 13:26:51,496][03445] Component RolloutWorker_w4 stopped! -[2024-07-05 13:26:51,497][03445] Component RolloutWorker_w11 stopped! -[2024-07-05 13:26:51,497][04006] Stopping RolloutWorker_w12... -[2024-07-05 13:26:51,497][03983] Stopping RolloutWorker_w5... -[2024-07-05 13:26:51,498][04006] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 13:26:51,498][03987] Stopping RolloutWorker_w10... -[2024-07-05 13:26:51,497][03445] Component RolloutWorker_w9 stopped! -[2024-07-05 13:26:51,498][03985] Stopping RolloutWorker_w7... -[2024-07-05 13:26:51,498][03987] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 13:26:51,498][03983] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 13:26:51,498][03985] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 13:26:51,498][03445] Component RolloutWorker_w1 stopped! -[2024-07-05 13:26:51,499][03445] Component RolloutWorker_w0 stopped! -[2024-07-05 13:26:51,500][03445] Component RolloutWorker_w5 stopped! -[2024-07-05 13:26:51,500][03445] Component RolloutWorker_w12 stopped! -[2024-07-05 13:26:51,501][03445] Component RolloutWorker_w10 stopped! -[2024-07-05 13:26:51,501][03445] Component RolloutWorker_w7 stopped! -[2024-07-05 13:26:51,503][03986] Stopping RolloutWorker_w8... -[2024-07-05 13:26:51,504][03986] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 13:26:51,503][03445] Component RolloutWorker_w8 stopped! -[2024-07-05 13:26:51,508][03445] Component RolloutWorker_w14 stopped! -[2024-07-05 13:26:51,517][04005] Stopping RolloutWorker_w13... -[2024-07-05 13:26:51,518][04005] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 13:26:51,508][04007] Stopping RolloutWorker_w14... -[2024-07-05 13:26:51,518][04007] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 13:26:51,517][03445] Component RolloutWorker_w13 stopped! -[2024-07-05 13:26:51,536][03977] Weights refcount: 2 0 -[2024-07-05 13:26:51,537][03977] Stopping InferenceWorker_p0-w0... -[2024-07-05 13:26:51,538][03977] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 13:26:51,538][03445] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 13:26:51,542][03957] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000037801_289660928.pth -[2024-07-05 13:26:51,569][03981] Stopping RolloutWorker_w3... -[2024-07-05 13:26:51,569][03981] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 13:26:51,569][03445] Component RolloutWorker_w3 stopped! -[2024-07-05 13:26:51,568][03957] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000039065_300015616.pth... -[2024-07-05 13:26:51,680][03445] Component LearnerWorker_p0 stopped! -[2024-07-05 13:26:51,682][03445] Waiting for process learner_proc0 to stop... -[2024-07-05 13:26:51,680][03957] Stopping LearnerWorker_p0... -[2024-07-05 13:26:51,688][03957] Loop learner_proc0_evt_loop terminating... -[2024-07-05 13:26:53,020][03445] Waiting for process inference_proc0-0 to join... -[2024-07-05 13:26:53,021][03445] Waiting for process rollout_proc0 to join... -[2024-07-05 13:26:53,021][03445] Waiting for process rollout_proc1 to join... -[2024-07-05 13:26:53,021][03445] Waiting for process rollout_proc2 to join... -[2024-07-05 13:26:53,022][03445] Waiting for process rollout_proc3 to join... -[2024-07-05 13:26:53,022][03445] Waiting for process rollout_proc4 to join... -[2024-07-05 13:26:53,023][03445] Waiting for process rollout_proc5 to join... -[2024-07-05 13:26:53,023][03445] Waiting for process rollout_proc6 to join... -[2024-07-05 13:26:53,023][03445] Waiting for process rollout_proc7 to join... -[2024-07-05 13:26:53,024][03445] Waiting for process rollout_proc8 to join... -[2024-07-05 13:26:53,024][03445] Waiting for process rollout_proc9 to join... -[2024-07-05 13:26:53,025][03445] Waiting for process rollout_proc10 to join... -[2024-07-05 13:26:53,025][03445] Waiting for process rollout_proc11 to join... -[2024-07-05 13:26:53,025][03445] Waiting for process rollout_proc12 to join... -[2024-07-05 13:26:53,026][03445] Waiting for process rollout_proc13 to join... -[2024-07-05 13:26:53,037][03445] Waiting for process rollout_proc14 to join... -[2024-07-05 13:26:53,037][03445] Waiting for process rollout_proc15 to join... -[2024-07-05 13:26:53,038][03445] Batcher 0 profile tree view: -batching: 74.5920, releasing_batches: 0.1372 -[2024-07-05 13:26:53,038][03445] InferenceWorker_p0-w0 profile tree view: +[2024-07-05 10:29:05,020][17912] Worker 0 uses CPU cores [0, 1] +[2024-07-05 10:29:05,142][17911] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:29:05,142][17911] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-07-05 10:29:05,156][17898] Using optimizer +[2024-07-05 10:29:05,182][17916] Worker 4 uses CPU cores [8, 9] +[2024-07-05 10:29:05,187][17911] Num visible devices: 1 +[2024-07-05 10:29:05,256][17918] Worker 6 uses CPU cores [12, 13] +[2024-07-05 10:29:05,675][17898] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001222_5005312.pth... +[2024-07-05 10:29:05,746][17898] Loading model from checkpoint +[2024-07-05 10:29:05,748][17898] Loaded experiment state at self.train_step=1222, self.env_steps=5005312 +[2024-07-05 10:29:05,748][17898] Initialized policy 0 weights for model version 1222 +[2024-07-05 10:29:05,749][17898] LearnerWorker_p0 finished initialization! +[2024-07-05 10:29:05,749][17898] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:29:05,817][17911] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 10:29:05,819][17911] RunningMeanStd input shape: (1,) +[2024-07-05 10:29:05,826][17911] Num input channels: 3 +[2024-07-05 10:29:05,836][17911] Convolutional layer output size: 4608 +[2024-07-05 10:29:05,847][17911] Policy head output size: 512 +[2024-07-05 10:29:05,973][17621] Inference worker 0-0 is ready! +[2024-07-05 10:29:05,974][17621] All inference workers are ready! Signal rollout workers to start! +[2024-07-05 10:29:06,014][17918] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:29:06,014][17917] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:29:06,014][17915] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:29:06,014][17912] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:29:06,015][17913] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:29:06,016][17914] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:29:06,016][17919] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:29:06,017][17916] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:29:06,595][17919] Decorrelating experience for 0 frames... +[2024-07-05 10:29:06,595][17914] Decorrelating experience for 0 frames... +[2024-07-05 10:29:06,595][17917] Decorrelating experience for 0 frames... +[2024-07-05 10:29:06,595][17915] Decorrelating experience for 0 frames... +[2024-07-05 10:29:06,595][17918] Decorrelating experience for 0 frames... +[2024-07-05 10:29:06,595][17913] Decorrelating experience for 0 frames... +[2024-07-05 10:29:06,754][17621] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 5005312. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:29:06,785][17919] Decorrelating experience for 32 frames... +[2024-07-05 10:29:06,786][17917] Decorrelating experience for 32 frames... +[2024-07-05 10:29:06,786][17915] Decorrelating experience for 32 frames... +[2024-07-05 10:29:06,786][17918] Decorrelating experience for 32 frames... +[2024-07-05 10:29:06,787][17914] Decorrelating experience for 32 frames... +[2024-07-05 10:29:06,796][17916] Decorrelating experience for 0 frames... +[2024-07-05 10:29:06,797][17912] Decorrelating experience for 0 frames... +[2024-07-05 10:29:06,976][17912] Decorrelating experience for 32 frames... +[2024-07-05 10:29:06,976][17916] Decorrelating experience for 32 frames... +[2024-07-05 10:29:06,976][17913] Decorrelating experience for 32 frames... +[2024-07-05 10:29:07,011][17919] Decorrelating experience for 64 frames... +[2024-07-05 10:29:07,015][17917] Decorrelating experience for 64 frames... +[2024-07-05 10:29:07,020][17918] Decorrelating experience for 64 frames... +[2024-07-05 10:29:07,163][17914] Decorrelating experience for 64 frames... +[2024-07-05 10:29:07,197][17913] Decorrelating experience for 64 frames... +[2024-07-05 10:29:07,197][17912] Decorrelating experience for 64 frames... +[2024-07-05 10:29:07,199][17916] Decorrelating experience for 64 frames... +[2024-07-05 10:29:07,211][17917] Decorrelating experience for 96 frames... +[2024-07-05 10:29:07,216][17915] Decorrelating experience for 64 frames... +[2024-07-05 10:29:07,385][17913] Decorrelating experience for 96 frames... +[2024-07-05 10:29:07,385][17916] Decorrelating experience for 96 frames... +[2024-07-05 10:29:07,404][17914] Decorrelating experience for 96 frames... +[2024-07-05 10:29:07,406][17915] Decorrelating experience for 96 frames... +[2024-07-05 10:29:07,466][17919] Decorrelating experience for 96 frames... +[2024-07-05 10:29:07,583][17912] Decorrelating experience for 96 frames... +[2024-07-05 10:29:07,655][17918] Decorrelating experience for 96 frames... +[2024-07-05 10:29:08,255][17898] Signal inference workers to stop experience collection... +[2024-07-05 10:29:08,262][17911] InferenceWorker_p0-w0: stopping experience collection +[2024-07-05 10:29:11,754][17621] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 5005312. Throughput: 0: 432.0. Samples: 2160. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:29:11,756][17621] Avg episode reward: [(0, '1.390')] +[2024-07-05 10:29:11,821][17898] Signal inference workers to resume experience collection... +[2024-07-05 10:29:11,822][17898] Stopping Batcher_0... +[2024-07-05 10:29:11,822][17898] Loop batcher_evt_loop terminating... +[2024-07-05 10:29:11,826][17621] Component Batcher_0 stopped! +[2024-07-05 10:29:11,834][17913] Stopping RolloutWorker_w1... +[2024-07-05 10:29:11,834][17917] Stopping RolloutWorker_w5... +[2024-07-05 10:29:11,834][17913] Loop rollout_proc1_evt_loop terminating... +[2024-07-05 10:29:11,834][17919] Stopping RolloutWorker_w7... +[2024-07-05 10:29:11,834][17915] Stopping RolloutWorker_w3... +[2024-07-05 10:29:11,834][17918] Stopping RolloutWorker_w6... +[2024-07-05 10:29:11,834][17914] Stopping RolloutWorker_w2... +[2024-07-05 10:29:11,834][17917] Loop rollout_proc5_evt_loop terminating... +[2024-07-05 10:29:11,834][17919] Loop rollout_proc7_evt_loop terminating... +[2024-07-05 10:29:11,834][17914] Loop rollout_proc2_evt_loop terminating... +[2024-07-05 10:29:11,834][17918] Loop rollout_proc6_evt_loop terminating... +[2024-07-05 10:29:11,834][17915] Loop rollout_proc3_evt_loop terminating... +[2024-07-05 10:29:11,835][17916] Stopping RolloutWorker_w4... +[2024-07-05 10:29:11,834][17621] Component RolloutWorker_w1 stopped! +[2024-07-05 10:29:11,835][17916] Loop rollout_proc4_evt_loop terminating... +[2024-07-05 10:29:11,836][17912] Stopping RolloutWorker_w0... +[2024-07-05 10:29:11,836][17912] Loop rollout_proc0_evt_loop terminating... +[2024-07-05 10:29:11,836][17621] Component RolloutWorker_w5 stopped! +[2024-07-05 10:29:11,836][17621] Component RolloutWorker_w7 stopped! +[2024-07-05 10:29:11,838][17621] Component RolloutWorker_w3 stopped! +[2024-07-05 10:29:11,839][17621] Component RolloutWorker_w6 stopped! +[2024-07-05 10:29:11,840][17621] Component RolloutWorker_w2 stopped! +[2024-07-05 10:29:11,840][17621] Component RolloutWorker_w4 stopped! +[2024-07-05 10:29:11,841][17621] Component RolloutWorker_w0 stopped! +[2024-07-05 10:29:11,849][17911] Weights refcount: 2 0 +[2024-07-05 10:29:11,851][17911] Stopping InferenceWorker_p0-w0... +[2024-07-05 10:29:11,851][17911] Loop inference_proc0-0_evt_loop terminating... +[2024-07-05 10:29:11,851][17621] Component InferenceWorker_p0-w0 stopped! +[2024-07-05 10:29:12,570][17898] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001224_5013504.pth... +[2024-07-05 10:29:12,667][17898] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001117_4575232.pth +[2024-07-05 10:29:12,668][17898] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001224_5013504.pth... +[2024-07-05 10:29:12,765][17898] Stopping LearnerWorker_p0... +[2024-07-05 10:29:12,765][17898] Loop learner_proc0_evt_loop terminating... +[2024-07-05 10:29:12,765][17621] Component LearnerWorker_p0 stopped! +[2024-07-05 10:29:12,767][17621] Waiting for process learner_proc0 to stop... +[2024-07-05 10:29:13,428][17621] Waiting for process inference_proc0-0 to join... +[2024-07-05 10:29:13,429][17621] Waiting for process rollout_proc0 to join... +[2024-07-05 10:29:13,430][17621] Waiting for process rollout_proc1 to join... +[2024-07-05 10:29:13,431][17621] Waiting for process rollout_proc2 to join... +[2024-07-05 10:29:13,431][17621] Waiting for process rollout_proc3 to join... +[2024-07-05 10:29:13,432][17621] Waiting for process rollout_proc4 to join... +[2024-07-05 10:29:13,432][17621] Waiting for process rollout_proc5 to join... +[2024-07-05 10:29:13,433][17621] Waiting for process rollout_proc6 to join... +[2024-07-05 10:29:13,433][17621] Waiting for process rollout_proc7 to join... +[2024-07-05 10:29:13,434][17621] Batcher 0 profile tree view: +batching: 0.0149, releasing_batches: 0.0005 +[2024-07-05 10:29:13,434][17621] InferenceWorker_p0-w0 profile tree view: +update_model: 0.0039 wait_policy: 0.0000 - wait_policy_total: 46.3283 -update_model: 15.9000 - weight_update: 0.0009 -one_step: 0.0032 - handle_policy_step: 946.6949 - deserialize: 73.4223, stack: 5.4817, obs_to_device_normalize: 226.5477, forward: 439.8337, send_messages: 47.7830 - prepare_outputs: 120.8101 - to_cpu: 73.1538 -[2024-07-05 13:26:53,039][03445] Learner 0 profile tree view: -misc: 0.0287, prepare_batch: 99.1999 -train: 216.5788 - epoch_init: 0.0189, minibatch_init: 0.0278, losses_postprocess: 1.3147, kl_divergence: 1.4215, after_optimizer: 1.0453 - calculate_losses: 76.4838 - losses_init: 0.0114, forward_head: 3.1345, bptt_initial: 60.0487, tail: 2.7195, advantages_returns: 0.7548, losses: 4.3175 - bptt: 4.6582 - bptt_forward_core: 4.4568 - update: 134.2664 - clip: 4.1150 -[2024-07-05 13:26:53,039][03445] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.4587, enqueue_policy_requests: 30.5053, env_step: 493.4499, overhead: 50.9340, complete_rollouts: 1.2904 -save_policy_outputs: 36.7253 - split_output_tensors: 16.9906 -[2024-07-05 13:26:53,040][03445] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.5011, enqueue_policy_requests: 31.9432, env_step: 506.1817, overhead: 52.0315, complete_rollouts: 1.1451 -save_policy_outputs: 37.6721 - split_output_tensors: 17.6665 -[2024-07-05 13:26:53,040][03445] Loop Runner_EvtLoop terminating... -[2024-07-05 13:26:53,041][03445] Runner profile tree view: -main_loop: 1051.7669 -[2024-07-05 13:26:53,041][03445] Collected {0: 300015616}, FPS: 47542.8 -[2024-07-05 13:27:07,171][03445] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 13:27:07,172][03445] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 13:27:07,172][03445] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 13:27:07,173][03445] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 13:27:07,173][03445] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 13:27:07,174][03445] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 13:27:07,174][03445] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 13:27:07,175][03445] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 13:27:07,175][03445] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 13:27:07,175][03445] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 13:27:07,176][03445] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 13:27:07,176][03445] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 13:27:07,177][03445] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 13:27:07,177][03445] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 13:27:07,177][03445] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 13:27:07,197][03445] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:27:07,199][03445] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 13:27:07,199][03445] RunningMeanStd input shape: (1,) -[2024-07-05 13:27:07,209][03445] ConvEncoder: input_channels=3 -[2024-07-05 13:27:07,278][03445] Conv encoder output size: 512 -[2024-07-05 13:27:07,279][03445] Policy head output size: 512 -[2024-07-05 13:27:08,979][03445] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000039065_300015616.pth... -[2024-07-05 13:27:09,799][03445] Num frames 100... -[2024-07-05 13:27:09,863][03445] Num frames 200... -[2024-07-05 13:27:09,925][03445] Num frames 300... -[2024-07-05 13:27:09,985][03445] Num frames 400... -[2024-07-05 13:27:10,046][03445] Num frames 500... -[2024-07-05 13:27:10,108][03445] Num frames 600... -[2024-07-05 13:27:10,171][03445] Num frames 700... -[2024-07-05 13:27:10,234][03445] Num frames 800... -[2024-07-05 13:27:10,296][03445] Num frames 900... -[2024-07-05 13:27:10,358][03445] Num frames 1000... -[2024-07-05 13:27:10,426][03445] Num frames 1100... -[2024-07-05 13:27:10,489][03445] Num frames 1200... -[2024-07-05 13:27:10,552][03445] Num frames 1300... -[2024-07-05 13:27:10,620][03445] Num frames 1400... -[2024-07-05 13:27:10,681][03445] Num frames 1500... -[2024-07-05 13:27:10,744][03445] Num frames 1600... -[2024-07-05 13:27:10,814][03445] Num frames 1700... -[2024-07-05 13:27:10,878][03445] Num frames 1800... -[2024-07-05 13:27:10,942][03445] Num frames 1900... -[2024-07-05 13:27:11,004][03445] Num frames 2000... -[2024-07-05 13:27:11,067][03445] Num frames 2100... -[2024-07-05 13:27:11,119][03445] Avg episode rewards: #0: 65.999, true rewards: #0: 21.000 -[2024-07-05 13:27:11,120][03445] Avg episode reward: 65.999, avg true_objective: 21.000 -[2024-07-05 13:27:11,183][03445] Num frames 2200... -[2024-07-05 13:27:11,243][03445] Num frames 2300... -[2024-07-05 13:27:11,304][03445] Num frames 2400... -[2024-07-05 13:27:11,366][03445] Num frames 2500... -[2024-07-05 13:27:11,428][03445] Num frames 2600... -[2024-07-05 13:27:11,488][03445] Num frames 2700... -[2024-07-05 13:27:11,549][03445] Num frames 2800... -[2024-07-05 13:27:11,612][03445] Num frames 2900... -[2024-07-05 13:27:11,672][03445] Num frames 3000... -[2024-07-05 13:27:11,733][03445] Num frames 3100... -[2024-07-05 13:27:11,794][03445] Num frames 3200... -[2024-07-05 13:27:11,855][03445] Num frames 3300... -[2024-07-05 13:27:11,917][03445] Num frames 3400... -[2024-07-05 13:27:11,978][03445] Num frames 3500... -[2024-07-05 13:27:12,039][03445] Num frames 3600... -[2024-07-05 13:27:12,100][03445] Num frames 3700... -[2024-07-05 13:27:12,162][03445] Num frames 3800... -[2024-07-05 13:27:12,224][03445] Num frames 3900... -[2024-07-05 13:27:12,286][03445] Num frames 4000... -[2024-07-05 13:27:12,345][03445] Num frames 4100... -[2024-07-05 13:27:12,453][03445] Avg episode rewards: #0: 60.954, true rewards: #0: 20.955 -[2024-07-05 13:27:12,455][03445] Avg episode reward: 60.954, avg true_objective: 20.955 -[2024-07-05 13:27:12,462][03445] Num frames 4200... -[2024-07-05 13:27:12,524][03445] Num frames 4300... -[2024-07-05 13:27:12,585][03445] Num frames 4400... -[2024-07-05 13:27:12,647][03445] Num frames 4500... -[2024-07-05 13:27:12,710][03445] Num frames 4600... -[2024-07-05 13:27:12,774][03445] Num frames 4700... -[2024-07-05 13:27:12,834][03445] Num frames 4800... -[2024-07-05 13:27:12,905][03445] Num frames 4900... -[2024-07-05 13:27:12,968][03445] Num frames 5000... -[2024-07-05 13:27:13,033][03445] Num frames 5100... -[2024-07-05 13:27:13,095][03445] Num frames 5200... -[2024-07-05 13:27:13,156][03445] Num frames 5300... -[2024-07-05 13:27:13,219][03445] Num frames 5400... -[2024-07-05 13:27:13,281][03445] Num frames 5500... -[2024-07-05 13:27:13,345][03445] Num frames 5600... -[2024-07-05 13:27:13,408][03445] Num frames 5700... -[2024-07-05 13:27:13,473][03445] Num frames 5800... -[2024-07-05 13:27:13,536][03445] Num frames 5900... -[2024-07-05 13:27:13,598][03445] Num frames 6000... -[2024-07-05 13:27:13,659][03445] Num frames 6100... -[2024-07-05 13:27:13,719][03445] Num frames 6200... -[2024-07-05 13:27:13,828][03445] Avg episode rewards: #0: 60.302, true rewards: #0: 20.970 -[2024-07-05 13:27:13,829][03445] Avg episode reward: 60.302, avg true_objective: 20.970 -[2024-07-05 13:27:13,837][03445] Num frames 6300... -[2024-07-05 13:27:13,900][03445] Num frames 6400... -[2024-07-05 13:27:13,966][03445] Num frames 6500... -[2024-07-05 13:27:14,032][03445] Num frames 6600... -[2024-07-05 13:27:14,096][03445] Num frames 6700... -[2024-07-05 13:27:14,162][03445] Num frames 6800... -[2024-07-05 13:27:14,231][03445] Num frames 6900... -[2024-07-05 13:27:14,297][03445] Num frames 7000... -[2024-07-05 13:27:14,365][03445] Num frames 7100... -[2024-07-05 13:27:14,431][03445] Num frames 7200... -[2024-07-05 13:27:14,497][03445] Num frames 7300... -[2024-07-05 13:27:14,564][03445] Num frames 7400... -[2024-07-05 13:27:14,633][03445] Num frames 7500... -[2024-07-05 13:27:14,697][03445] Num frames 7600... -[2024-07-05 13:27:14,760][03445] Num frames 7700... -[2024-07-05 13:27:14,825][03445] Num frames 7800... -[2024-07-05 13:27:14,887][03445] Num frames 7900... -[2024-07-05 13:27:14,951][03445] Num frames 8000... -[2024-07-05 13:27:15,014][03445] Num frames 8100... -[2024-07-05 13:27:15,076][03445] Num frames 8200... -[2024-07-05 13:27:15,140][03445] Num frames 8300... -[2024-07-05 13:27:15,250][03445] Avg episode rewards: #0: 60.476, true rewards: #0: 20.978 -[2024-07-05 13:27:15,250][03445] Avg episode reward: 60.476, avg true_objective: 20.978 -[2024-07-05 13:27:15,258][03445] Num frames 8400... -[2024-07-05 13:27:15,321][03445] Num frames 8500... -[2024-07-05 13:27:15,381][03445] Num frames 8600... -[2024-07-05 13:27:15,443][03445] Num frames 8700... -[2024-07-05 13:27:15,504][03445] Num frames 8800... -[2024-07-05 13:27:15,565][03445] Num frames 8900... -[2024-07-05 13:27:15,626][03445] Num frames 9000... -[2024-07-05 13:27:15,686][03445] Num frames 9100... -[2024-07-05 13:27:15,747][03445] Num frames 9200... -[2024-07-05 13:27:15,808][03445] Num frames 9300... -[2024-07-05 13:27:15,868][03445] Num frames 9400... -[2024-07-05 13:27:15,930][03445] Num frames 9500... -[2024-07-05 13:27:15,992][03445] Num frames 9600... -[2024-07-05 13:27:16,063][03445] Num frames 9700... -[2024-07-05 13:27:16,122][03445] Num frames 9800... -[2024-07-05 13:27:16,184][03445] Num frames 9900... -[2024-07-05 13:27:16,249][03445] Num frames 10000... -[2024-07-05 13:27:16,311][03445] Num frames 10100... -[2024-07-05 13:27:16,375][03445] Num frames 10200... -[2024-07-05 13:27:16,438][03445] Num frames 10300... -[2024-07-05 13:27:16,501][03445] Num frames 10400... -[2024-07-05 13:27:16,611][03445] Avg episode rewards: #0: 60.181, true rewards: #0: 20.982 -[2024-07-05 13:27:16,612][03445] Avg episode reward: 60.181, avg true_objective: 20.982 -[2024-07-05 13:27:16,620][03445] Num frames 10500... -[2024-07-05 13:27:16,681][03445] Num frames 10600... -[2024-07-05 13:27:16,743][03445] Num frames 10700... -[2024-07-05 13:27:16,804][03445] Num frames 10800... -[2024-07-05 13:27:16,865][03445] Num frames 10900... -[2024-07-05 13:27:16,927][03445] Num frames 11000... -[2024-07-05 13:27:16,990][03445] Num frames 11100... -[2024-07-05 13:27:17,052][03445] Num frames 11200... -[2024-07-05 13:27:17,113][03445] Num frames 11300... -[2024-07-05 13:27:17,177][03445] Num frames 11400... -[2024-07-05 13:27:17,237][03445] Num frames 11500... -[2024-07-05 13:27:17,299][03445] Num frames 11600... -[2024-07-05 13:27:17,363][03445] Num frames 11700... -[2024-07-05 13:27:17,425][03445] Num frames 11800... -[2024-07-05 13:27:17,487][03445] Num frames 11900... -[2024-07-05 13:27:17,550][03445] Num frames 12000... -[2024-07-05 13:27:17,613][03445] Num frames 12100... -[2024-07-05 13:27:17,676][03445] Num frames 12200... -[2024-07-05 13:27:17,737][03445] Num frames 12300... -[2024-07-05 13:27:17,799][03445] Num frames 12400... -[2024-07-05 13:27:17,860][03445] Num frames 12500... -[2024-07-05 13:27:17,969][03445] Avg episode rewards: #0: 60.650, true rewards: #0: 20.985 -[2024-07-05 13:27:17,970][03445] Avg episode reward: 60.650, avg true_objective: 20.985 -[2024-07-05 13:27:17,977][03445] Num frames 12600... -[2024-07-05 13:27:18,038][03445] Num frames 12700... -[2024-07-05 13:27:18,100][03445] Num frames 12800... -[2024-07-05 13:27:18,161][03445] Num frames 12900... -[2024-07-05 13:27:18,220][03445] Num frames 13000... -[2024-07-05 13:27:18,282][03445] Num frames 13100... -[2024-07-05 13:27:18,342][03445] Num frames 13200... -[2024-07-05 13:27:18,402][03445] Num frames 13300... -[2024-07-05 13:27:18,461][03445] Num frames 13400... -[2024-07-05 13:27:18,525][03445] Num frames 13500... -[2024-07-05 13:27:18,586][03445] Num frames 13600... -[2024-07-05 13:27:18,646][03445] Num frames 13700... -[2024-07-05 13:27:18,705][03445] Num frames 13800... -[2024-07-05 13:27:18,765][03445] Num frames 13900... -[2024-07-05 13:27:18,824][03445] Num frames 14000... -[2024-07-05 13:27:18,885][03445] Num frames 14100... -[2024-07-05 13:27:18,949][03445] Num frames 14200... -[2024-07-05 13:27:19,008][03445] Num frames 14300... -[2024-07-05 13:27:19,067][03445] Num frames 14400... -[2024-07-05 13:27:19,140][03445] Num frames 14500... -[2024-07-05 13:27:19,201][03445] Num frames 14600... -[2024-07-05 13:27:19,308][03445] Avg episode rewards: #0: 60.557, true rewards: #0: 20.987 -[2024-07-05 13:27:19,310][03445] Avg episode reward: 60.557, avg true_objective: 20.987 -[2024-07-05 13:27:19,318][03445] Num frames 14700... -[2024-07-05 13:27:19,378][03445] Num frames 14800... -[2024-07-05 13:27:19,437][03445] Num frames 14900... -[2024-07-05 13:27:19,496][03445] Num frames 15000... -[2024-07-05 13:27:19,559][03445] Num frames 15100... -[2024-07-05 13:27:19,619][03445] Num frames 15200... -[2024-07-05 13:27:19,678][03445] Num frames 15300... -[2024-07-05 13:27:19,766][03445] Avg episode rewards: #0: 54.948, true rewards: #0: 19.199 -[2024-07-05 13:27:19,767][03445] Avg episode reward: 54.948, avg true_objective: 19.199 -[2024-07-05 13:27:19,796][03445] Num frames 15400... -[2024-07-05 13:27:19,856][03445] Num frames 15500... -[2024-07-05 13:27:19,916][03445] Num frames 15600... -[2024-07-05 13:27:19,977][03445] Num frames 15700... -[2024-07-05 13:27:20,039][03445] Num frames 15800... -[2024-07-05 13:27:20,099][03445] Num frames 15900... -[2024-07-05 13:27:20,162][03445] Num frames 16000... -[2024-07-05 13:27:20,224][03445] Num frames 16100... -[2024-07-05 13:27:20,284][03445] Num frames 16200... -[2024-07-05 13:27:20,346][03445] Num frames 16300... -[2024-07-05 13:27:20,451][03445] Avg episode rewards: #0: 51.536, true rewards: #0: 18.203 -[2024-07-05 13:27:20,453][03445] Avg episode reward: 51.536, avg true_objective: 18.203 -[2024-07-05 13:27:20,464][03445] Num frames 16400... -[2024-07-05 13:27:20,530][03445] Num frames 16500... -[2024-07-05 13:27:20,591][03445] Num frames 16600... -[2024-07-05 13:27:20,654][03445] Num frames 16700... -[2024-07-05 13:27:20,714][03445] Num frames 16800... -[2024-07-05 13:27:20,776][03445] Num frames 16900... -[2024-07-05 13:27:20,842][03445] Num frames 17000... -[2024-07-05 13:27:20,908][03445] Num frames 17100... -[2024-07-05 13:27:20,974][03445] Num frames 17200... -[2024-07-05 13:27:21,037][03445] Num frames 17300... -[2024-07-05 13:27:21,099][03445] Num frames 17400... -[2024-07-05 13:27:21,161][03445] Num frames 17500... -[2024-07-05 13:27:21,223][03445] Num frames 17600... -[2024-07-05 13:27:21,312][03445] Avg episode rewards: #0: 49.852, true rewards: #0: 17.653 -[2024-07-05 13:27:21,314][03445] Avg episode reward: 49.852, avg true_objective: 17.653 -[2024-07-05 13:27:39,424][03445] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 13:29:48,824][03445] Environment doom_basic already registered, overwriting... -[2024-07-05 13:29:48,825][03445] Environment doom_two_colors_easy already registered, overwriting... -[2024-07-05 13:29:48,826][03445] Environment doom_two_colors_hard already registered, overwriting... -[2024-07-05 13:29:48,826][03445] Environment doom_dm already registered, overwriting... -[2024-07-05 13:29:48,827][03445] Environment doom_dwango5 already registered, overwriting... -[2024-07-05 13:29:48,827][03445] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-07-05 13:29:48,827][03445] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-07-05 13:29:48,828][03445] Environment doom_my_way_home already registered, overwriting... -[2024-07-05 13:29:48,828][03445] Environment doom_deadly_corridor already registered, overwriting... -[2024-07-05 13:29:48,829][03445] Environment doom_defend_the_center already registered, overwriting... -[2024-07-05 13:29:48,829][03445] Environment doom_defend_the_line already registered, overwriting... -[2024-07-05 13:29:48,830][03445] Environment doom_health_gathering already registered, overwriting... -[2024-07-05 13:29:48,830][03445] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-07-05 13:29:48,831][03445] Environment doom_battle already registered, overwriting... -[2024-07-05 13:29:48,831][03445] Environment doom_battle2 already registered, overwriting... -[2024-07-05 13:29:48,832][03445] Environment doom_duel_bots already registered, overwriting... -[2024-07-05 13:29:48,832][03445] Environment doom_deathmatch_bots already registered, overwriting... -[2024-07-05 13:29:48,832][03445] Environment doom_duel already registered, overwriting... -[2024-07-05 13:29:48,833][03445] Environment doom_deathmatch_full already registered, overwriting... -[2024-07-05 13:29:48,833][03445] Environment doom_benchmark already registered, overwriting... -[2024-07-05 13:29:48,834][03445] register_encoder_factory: -[2024-07-05 13:29:48,840][03445] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 13:29:48,841][03445] Overriding arg 'train_for_env_steps' with value 350000000 passed from command line -[2024-07-05 13:29:48,846][03445] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment already exists! -[2024-07-05 13:29:48,847][03445] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment... -[2024-07-05 13:29:48,847][03445] Weights and Biases integration disabled -[2024-07-05 13:29:48,848][03445] Environment var CUDA_VISIBLE_DEVICES is 0 + wait_policy_total: 1.2594 +one_step: 0.0027 + handle_policy_step: 0.9892 + deserialize: 0.0202, stack: 0.0021, obs_to_device_normalize: 0.1560, forward: 0.7558, send_messages: 0.0164 + prepare_outputs: 0.0250 + to_cpu: 0.0102 +[2024-07-05 10:29:13,434][17621] Learner 0 profile tree view: +misc: 0.0000, prepare_batch: 0.8941 +train: 3.6211 + epoch_init: 0.0000, minibatch_init: 0.0000, losses_postprocess: 0.0002, kl_divergence: 0.0071, after_optimizer: 0.0451 + calculate_losses: 1.1230 + losses_init: 0.0000, forward_head: 0.7392, bptt_initial: 0.2961, tail: 0.0354, advantages_returns: 0.0008, losses: 0.0406 + bptt: 0.0106 + bptt_forward_core: 0.0105 + update: 2.4448 + clip: 0.0529 +[2024-07-05 10:29:13,435][17621] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.0004, enqueue_policy_requests: 0.0150, env_step: 0.1536, overhead: 0.0144, complete_rollouts: 0.0003 +save_policy_outputs: 0.0149 + split_output_tensors: 0.0069 +[2024-07-05 10:29:13,435][17621] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.0004, enqueue_policy_requests: 0.0194, env_step: 0.1854, overhead: 0.0207, complete_rollouts: 0.0004 +save_policy_outputs: 0.0159 + split_output_tensors: 0.0077 +[2024-07-05 10:29:13,436][17621] Loop Runner_EvtLoop terminating... +[2024-07-05 10:29:13,436][17621] Runner profile tree view: +main_loop: 12.2816 +[2024-07-05 10:29:13,437][17621] Collected {0: 5013504}, FPS: 667.0 +[2024-07-05 10:30:24,967][17621] Environment doom_basic already registered, overwriting... +[2024-07-05 10:30:24,969][17621] Environment doom_two_colors_easy already registered, overwriting... +[2024-07-05 10:30:24,970][17621] Environment doom_two_colors_hard already registered, overwriting... +[2024-07-05 10:30:24,970][17621] Environment doom_dm already registered, overwriting... +[2024-07-05 10:30:24,970][17621] Environment doom_dwango5 already registered, overwriting... +[2024-07-05 10:30:24,971][17621] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-07-05 10:30:24,971][17621] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-07-05 10:30:24,972][17621] Environment doom_my_way_home already registered, overwriting... +[2024-07-05 10:30:24,972][17621] Environment doom_deadly_corridor already registered, overwriting... +[2024-07-05 10:30:24,973][17621] Environment doom_defend_the_center already registered, overwriting... +[2024-07-05 10:30:24,973][17621] Environment doom_defend_the_line already registered, overwriting... +[2024-07-05 10:30:24,974][17621] Environment doom_health_gathering already registered, overwriting... +[2024-07-05 10:30:24,974][17621] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-07-05 10:30:24,974][17621] Environment doom_battle already registered, overwriting... +[2024-07-05 10:30:24,975][17621] Environment doom_battle2 already registered, overwriting... +[2024-07-05 10:30:24,975][17621] Environment doom_duel_bots already registered, overwriting... +[2024-07-05 10:30:24,975][17621] Environment doom_deathmatch_bots already registered, overwriting... +[2024-07-05 10:30:24,975][17621] Environment doom_duel already registered, overwriting... +[2024-07-05 10:30:24,976][17621] Environment doom_deathmatch_full already registered, overwriting... +[2024-07-05 10:30:24,976][17621] Environment doom_benchmark already registered, overwriting... +[2024-07-05 10:30:24,976][17621] register_encoder_factory: +[2024-07-05 10:30:24,983][17621] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json +[2024-07-05 10:30:24,984][17621] Overriding arg 'train_for_env_steps' with value 10000000 passed from command line +[2024-07-05 10:30:24,989][17621] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet already exists! +[2024-07-05 10:30:24,990][17621] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet... +[2024-07-05 10:30:24,991][17621] Weights and Biases integration disabled +[2024-07-05 10:30:24,993][17621] Environment var CUDA_VISIBLE_DEVICES is 0 -[2024-07-05 13:29:51,342][03445] Starting experiment with the following configuration: +[2024-07-05 10:30:28,261][17621] Starting experiment with the following configuration: help=False algo=APPO env=doom_health_gathering_supreme -experiment=default_experiment +experiment=conv_resnet train_dir=/home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir restart_behavior=resume device=gpu @@ -13719,9 +2542,9 @@ num_batches_to_accumulate=2 worker_num_splits=2 policy_workers_per_policy=1 max_policy_lag=1000 -num_workers=16 -num_envs_per_worker=8 -batch_size=2048 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 num_batches_per_epoch=1 num_epochs=1 rollout=32 @@ -13769,7 +2592,7 @@ stats_avg=100 summaries_use_frameskip=True heartbeat_interval=20 heartbeat_reporting_interval=600 -train_for_env_steps=350000000 +train_for_env_steps=10000000 train_for_seconds=10000000000 save_every_sec=120 keep_checkpoints=2 @@ -13780,7 +2603,7 @@ save_best_metric=reward save_best_after=100000 benchmark=False encoder_mlp_layers=[512, 512] -encoder_conv_architecture=convnet_simple +encoder_conv_architecture=resnet_impala encoder_conv_mlp_layers=[512] use_rnn=True rnn_size=512 @@ -13829,74 +2652,57 @@ res_h=72 wide_aspect_ratio=False eval_env_frameskip=1 fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=20000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 20000000} +command_line=--env=doom_health_gathering_supreme --experiment=conv_resnet --seed=200 --num_workers=8 --num_envs_per_worker=4 --batch_size=1024 --encoder_conv_architecture=resnet_impala --train_for_env_steps=5000000 +cli_args={'env': 'doom_health_gathering_supreme', 'experiment': 'conv_resnet', 'seed': 200, 'num_workers': 8, 'num_envs_per_worker': 4, 'batch_size': 1024, 'train_for_env_steps': 5000000, 'encoder_conv_architecture': 'resnet_impala'} git_hash=unknown git_repo_name=not a git repository -[2024-07-05 13:29:51,343][03445] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 13:29:51,344][03445] Rollout worker 0 uses device cpu -[2024-07-05 13:29:51,345][03445] Rollout worker 1 uses device cpu -[2024-07-05 13:29:51,345][03445] Rollout worker 2 uses device cpu -[2024-07-05 13:29:51,345][03445] Rollout worker 3 uses device cpu -[2024-07-05 13:29:51,346][03445] Rollout worker 4 uses device cpu -[2024-07-05 13:29:51,346][03445] Rollout worker 5 uses device cpu -[2024-07-05 13:29:51,346][03445] Rollout worker 6 uses device cpu -[2024-07-05 13:29:51,347][03445] Rollout worker 7 uses device cpu -[2024-07-05 13:29:51,347][03445] Rollout worker 8 uses device cpu -[2024-07-05 13:29:51,347][03445] Rollout worker 9 uses device cpu -[2024-07-05 13:29:51,348][03445] Rollout worker 10 uses device cpu -[2024-07-05 13:29:51,348][03445] Rollout worker 11 uses device cpu -[2024-07-05 13:29:51,348][03445] Rollout worker 12 uses device cpu -[2024-07-05 13:29:51,348][03445] Rollout worker 13 uses device cpu -[2024-07-05 13:29:51,349][03445] Rollout worker 14 uses device cpu -[2024-07-05 13:29:51,349][03445] Rollout worker 15 uses device cpu -[2024-07-05 13:29:51,432][03445] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:29:51,432][03445] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 13:29:51,511][03445] Starting all processes... -[2024-07-05 13:29:51,512][03445] Starting process learner_proc0 -[2024-07-05 13:29:51,561][03445] Starting all processes... -[2024-07-05 13:29:51,565][03445] Starting process inference_proc0-0 -[2024-07-05 13:29:51,565][03445] Starting process rollout_proc0 -[2024-07-05 13:29:51,566][03445] Starting process rollout_proc1 -[2024-07-05 13:29:51,566][03445] Starting process rollout_proc2 -[2024-07-05 13:29:51,567][03445] Starting process rollout_proc3 -[2024-07-05 13:29:51,567][03445] Starting process rollout_proc4 -[2024-07-05 13:29:51,567][03445] Starting process rollout_proc5 -[2024-07-05 13:29:51,568][03445] Starting process rollout_proc6 -[2024-07-05 13:29:51,570][03445] Starting process rollout_proc7 -[2024-07-05 13:29:51,570][03445] Starting process rollout_proc8 -[2024-07-05 13:29:51,570][03445] Starting process rollout_proc9 -[2024-07-05 13:29:51,570][03445] Starting process rollout_proc10 -[2024-07-05 13:29:51,572][03445] Starting process rollout_proc11 -[2024-07-05 13:29:51,572][03445] Starting process rollout_proc12 -[2024-07-05 13:29:51,574][03445] Starting process rollout_proc13 -[2024-07-05 13:29:51,575][03445] Starting process rollout_proc14 -[2024-07-05 13:29:51,594][03445] Starting process rollout_proc15 -[2024-07-05 13:29:55,403][06966] Worker 0 uses CPU cores [0] -[2024-07-05 13:29:55,535][06945] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:29:55,535][06945] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 13:29:55,603][06996] Worker 15 uses CPU cores [15] -[2024-07-05 13:29:55,627][06967] Worker 2 uses CPU cores [2] -[2024-07-05 13:29:55,643][06975] Worker 9 uses CPU cores [9] -[2024-07-05 13:29:55,645][06945] Num visible devices: 1 -[2024-07-05 13:29:55,647][06991] Worker 11 uses CPU cores [11] -[2024-07-05 13:29:55,681][06965] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:29:55,681][06965] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 13:29:55,688][06945] Setting fixed seed 200 -[2024-07-05 13:29:55,699][06945] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:29:55,699][06945] Initializing actor-critic model on device cuda:0 -[2024-07-05 13:29:55,700][06945] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 13:29:55,701][06945] RunningMeanStd input shape: (1,) -[2024-07-05 13:29:55,713][06945] ConvEncoder: input_channels=3 -[2024-07-05 13:29:55,731][06970] Worker 3 uses CPU cores [3] -[2024-07-05 13:29:55,766][06965] Num visible devices: 1 -[2024-07-05 13:29:55,767][06993] Worker 13 uses CPU cores [13] -[2024-07-05 13:29:55,771][06968] Worker 1 uses CPU cores [1] -[2024-07-05 13:29:55,773][06995] Worker 12 uses CPU cores [12] -[2024-07-05 13:29:55,825][06945] Conv encoder output size: 512 -[2024-07-05 13:29:55,825][06945] Policy head output size: 512 -[2024-07-05 13:29:55,838][06945] Created Actor Critic model with architecture: -[2024-07-05 13:29:55,838][06945] ActorCriticSharedWeights( +[2024-07-05 10:30:28,262][17621] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json... +[2024-07-05 10:30:28,263][17621] Rollout worker 0 uses device cpu +[2024-07-05 10:30:28,264][17621] Rollout worker 1 uses device cpu +[2024-07-05 10:30:28,264][17621] Rollout worker 2 uses device cpu +[2024-07-05 10:30:28,265][17621] Rollout worker 3 uses device cpu +[2024-07-05 10:30:28,265][17621] Rollout worker 4 uses device cpu +[2024-07-05 10:30:28,266][17621] Rollout worker 5 uses device cpu +[2024-07-05 10:30:28,266][17621] Rollout worker 6 uses device cpu +[2024-07-05 10:30:28,266][17621] Rollout worker 7 uses device cpu +[2024-07-05 10:30:28,301][17621] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:30:28,302][17621] InferenceWorker_p0-w0: min num requests: 2 +[2024-07-05 10:30:28,327][17621] Starting all processes... +[2024-07-05 10:30:28,327][17621] Starting process learner_proc0 +[2024-07-05 10:30:28,377][17621] Starting all processes... +[2024-07-05 10:30:28,380][17621] Starting process inference_proc0-0 +[2024-07-05 10:30:28,380][17621] Starting process rollout_proc0 +[2024-07-05 10:30:28,381][17621] Starting process rollout_proc1 +[2024-07-05 10:30:28,382][17621] Starting process rollout_proc2 +[2024-07-05 10:30:28,382][17621] Starting process rollout_proc3 +[2024-07-05 10:30:28,388][17621] Starting process rollout_proc4 +[2024-07-05 10:30:28,388][17621] Starting process rollout_proc5 +[2024-07-05 10:30:28,388][17621] Starting process rollout_proc6 +[2024-07-05 10:30:28,390][17621] Starting process rollout_proc7 +[2024-07-05 10:30:31,042][19516] Worker 2 uses CPU cores [4, 5] +[2024-07-05 10:30:31,097][19499] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:30:31,097][19499] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-07-05 10:30:31,131][19518] Worker 5 uses CPU cores [10, 11] +[2024-07-05 10:30:31,160][19517] Worker 4 uses CPU cores [8, 9] +[2024-07-05 10:30:31,167][19513] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:30:31,168][19513] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-07-05 10:30:31,296][19520] Worker 7 uses CPU cores [14, 15] +[2024-07-05 10:30:31,423][19514] Worker 1 uses CPU cores [2, 3] +[2024-07-05 10:30:31,489][19512] Worker 0 uses CPU cores [0, 1] +[2024-07-05 10:30:31,570][19519] Worker 6 uses CPU cores [12, 13] +[2024-07-05 10:30:31,686][19515] Worker 3 uses CPU cores [6, 7] +[2024-07-05 10:30:32,719][19513] Num visible devices: 1 +[2024-07-05 10:30:32,719][19499] Num visible devices: 1 +[2024-07-05 10:30:32,744][19499] Setting fixed seed 200 +[2024-07-05 10:30:32,745][19499] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:30:32,745][19499] Initializing actor-critic model on device cuda:0 +[2024-07-05 10:30:32,746][19499] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 10:30:32,746][19499] RunningMeanStd input shape: (1,) +[2024-07-05 10:30:32,753][19499] Num input channels: 3 +[2024-07-05 10:30:32,764][19499] Convolutional layer output size: 4608 +[2024-07-05 10:30:32,775][19499] Policy head output size: 512 +[2024-07-05 10:30:32,896][19499] Created Actor Critic model with architecture: +[2024-07-05 10:30:32,897][19499] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -13906,1720 +2712,67 @@ git_repo_name=not a git repository ) (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) + (basic_encoder): ResnetEncoder( + (conv_head): Sequential( + (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (2): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) + (3): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - ) - ) - ) - (core): ModelCoreRNN( - (core): GRU(512, 512) - ) - (decoder): MlpDecoder( - (mlp): Identity() - ) - (critic_linear): Linear(in_features=512, out_features=1, bias=True) - (action_parameterization): ActionParameterizationDefault( - (distribution_linear): Linear(in_features=512, out_features=5, bias=True) - ) -) -[2024-07-05 13:29:55,843][06994] Worker 14 uses CPU cores [14] -[2024-07-05 13:29:55,862][06973] Worker 7 uses CPU cores [7] -[2024-07-05 13:29:55,888][06972] Worker 6 uses CPU cores [6] -[2024-07-05 13:29:55,893][06971] Worker 5 uses CPU cores [5] -[2024-07-05 13:29:55,895][06969] Worker 4 uses CPU cores [4] -[2024-07-05 13:29:55,937][06945] Using optimizer -[2024-07-05 13:29:56,018][06992] Worker 10 uses CPU cores [10] -[2024-07-05 13:29:56,135][06974] Worker 8 uses CPU cores [8] -[2024-07-05 13:29:56,553][06945] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000039065_300015616.pth... -[2024-07-05 13:29:56,571][06945] Loading model from checkpoint -[2024-07-05 13:29:56,572][06945] Loaded experiment state at self.train_step=39065, self.env_steps=300015616 -[2024-07-05 13:29:56,572][06945] Initialized policy 0 weights for model version 39065 -[2024-07-05 13:29:56,573][06945] LearnerWorker_p0 finished initialization! -[2024-07-05 13:29:56,573][06945] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:29:56,635][06965] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 13:29:56,636][06965] RunningMeanStd input shape: (1,) -[2024-07-05 13:29:56,643][06965] ConvEncoder: input_channels=3 -[2024-07-05 13:29:56,696][06965] Conv encoder output size: 512 -[2024-07-05 13:29:56,696][06965] Policy head output size: 512 -[2024-07-05 13:29:56,728][03445] Inference worker 0-0 is ready! -[2024-07-05 13:29:56,729][03445] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 13:29:56,779][06968] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,779][06995] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,779][06967] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,781][06971] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,782][06972] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,782][06973] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,783][06991] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,783][06974] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,783][06970] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,784][06969] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,791][06994] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,795][06966] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,796][06992] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,796][06975] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,796][06993] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:56,807][06996] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:29:57,356][06973] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,387][06995] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,390][06991] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,390][06971] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,391][06967] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,392][06968] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,392][06972] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,393][06966] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,512][06973] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,539][06991] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,584][06993] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,597][06994] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,605][06996] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,646][06995] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,694][06968] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,695][06967] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,747][06994] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,788][06975] Decorrelating experience for 0 frames... -[2024-07-05 13:29:57,790][06993] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,846][06966] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,849][06995] Decorrelating experience for 64 frames... -[2024-07-05 13:29:57,853][06967] Decorrelating experience for 64 frames... -[2024-07-05 13:29:57,906][06972] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,946][06975] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,959][06971] Decorrelating experience for 32 frames... -[2024-07-05 13:29:57,986][06992] Decorrelating experience for 0 frames... -[2024-07-05 13:29:58,013][06994] Decorrelating experience for 64 frames... -[2024-07-05 13:29:58,062][06991] Decorrelating experience for 64 frames... -[2024-07-05 13:29:58,072][06995] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,117][06993] Decorrelating experience for 64 frames... -[2024-07-05 13:29:58,141][06972] Decorrelating experience for 64 frames... -[2024-07-05 13:29:58,164][06968] Decorrelating experience for 64 frames... -[2024-07-05 13:29:58,198][06992] Decorrelating experience for 32 frames... -[2024-07-05 13:29:58,267][06975] Decorrelating experience for 64 frames... -[2024-07-05 13:29:58,284][06991] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,289][06967] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,291][06973] Decorrelating experience for 64 frames... -[2024-07-05 13:29:58,332][06971] Decorrelating experience for 64 frames... -[2024-07-05 13:29:58,332][06995] Decorrelating experience for 128 frames... -[2024-07-05 13:29:58,372][06966] Decorrelating experience for 64 frames... -[2024-07-05 13:29:58,386][06972] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,509][06975] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,516][06974] Decorrelating experience for 0 frames... -[2024-07-05 13:29:58,529][06992] Decorrelating experience for 64 frames... -[2024-07-05 13:29:58,531][06973] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,547][06967] Decorrelating experience for 128 frames... -[2024-07-05 13:29:58,570][06966] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,604][06993] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,643][06995] Decorrelating experience for 160 frames... -[2024-07-05 13:29:58,729][06991] Decorrelating experience for 128 frames... -[2024-07-05 13:29:58,737][06968] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,739][06972] Decorrelating experience for 128 frames... -[2024-07-05 13:29:58,751][06970] Decorrelating experience for 0 frames... -[2024-07-05 13:29:58,803][06967] Decorrelating experience for 160 frames... -[2024-07-05 13:29:58,811][06992] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,849][03445] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 300015616. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 13:29:58,877][06993] Decorrelating experience for 128 frames... -[2024-07-05 13:29:58,912][06975] Decorrelating experience for 128 frames... -[2024-07-05 13:29:58,946][06969] Decorrelating experience for 0 frames... -[2024-07-05 13:29:58,971][06971] Decorrelating experience for 96 frames... -[2024-07-05 13:29:58,986][06968] Decorrelating experience for 128 frames... -[2024-07-05 13:29:59,029][06967] Decorrelating experience for 192 frames... -[2024-07-05 13:29:59,097][06991] Decorrelating experience for 160 frames... -[2024-07-05 13:29:59,102][06994] Decorrelating experience for 96 frames... -[2024-07-05 13:29:59,105][06992] Decorrelating experience for 128 frames... -[2024-07-05 13:29:59,163][06973] Decorrelating experience for 128 frames... -[2024-07-05 13:29:59,165][06970] Decorrelating experience for 32 frames... -[2024-07-05 13:29:59,173][06969] Decorrelating experience for 32 frames... -[2024-07-05 13:29:59,235][06993] Decorrelating experience for 160 frames... -[2024-07-05 13:29:59,261][06966] Decorrelating experience for 128 frames... -[2024-07-05 13:29:59,321][06967] Decorrelating experience for 224 frames... -[2024-07-05 13:29:59,331][06971] Decorrelating experience for 128 frames... -[2024-07-05 13:29:59,331][06974] Decorrelating experience for 32 frames... -[2024-07-05 13:29:59,368][06996] Decorrelating experience for 32 frames... -[2024-07-05 13:29:59,374][06973] Decorrelating experience for 160 frames... -[2024-07-05 13:29:59,396][06970] Decorrelating experience for 64 frames... -[2024-07-05 13:29:59,435][06994] Decorrelating experience for 128 frames... -[2024-07-05 13:29:59,446][06993] Decorrelating experience for 192 frames... -[2024-07-05 13:29:59,468][06966] Decorrelating experience for 160 frames... -[2024-07-05 13:29:59,550][06972] Decorrelating experience for 160 frames... -[2024-07-05 13:29:59,558][06969] Decorrelating experience for 64 frames... -[2024-07-05 13:29:59,565][06975] Decorrelating experience for 160 frames... -[2024-07-05 13:29:59,604][06971] Decorrelating experience for 160 frames... -[2024-07-05 13:29:59,618][06974] Decorrelating experience for 64 frames... -[2024-07-05 13:29:59,638][06970] Decorrelating experience for 96 frames... -[2024-07-05 13:29:59,642][06994] Decorrelating experience for 160 frames... -[2024-07-05 13:29:59,678][06973] Decorrelating experience for 192 frames... -[2024-07-05 13:29:59,738][06966] Decorrelating experience for 192 frames... -[2024-07-05 13:29:59,768][06995] Decorrelating experience for 192 frames... -[2024-07-05 13:29:59,816][06974] Decorrelating experience for 96 frames... -[2024-07-05 13:29:59,824][06996] Decorrelating experience for 64 frames... -[2024-07-05 13:29:59,849][06972] Decorrelating experience for 192 frames... -[2024-07-05 13:29:59,878][06993] Decorrelating experience for 224 frames... -[2024-07-05 13:29:59,900][06970] Decorrelating experience for 128 frames... -[2024-07-05 13:29:59,925][06969] Decorrelating experience for 96 frames... -[2024-07-05 13:29:59,955][06968] Decorrelating experience for 160 frames... -[2024-07-05 13:29:59,996][06973] Decorrelating experience for 224 frames... -[2024-07-05 13:30:00,058][06991] Decorrelating experience for 192 frames... -[2024-07-05 13:30:00,070][06996] Decorrelating experience for 96 frames... -[2024-07-05 13:30:00,079][06971] Decorrelating experience for 192 frames... -[2024-07-05 13:30:00,089][06995] Decorrelating experience for 224 frames... -[2024-07-05 13:30:00,115][06975] Decorrelating experience for 192 frames... -[2024-07-05 13:30:00,151][06972] Decorrelating experience for 224 frames... -[2024-07-05 13:30:00,211][06968] Decorrelating experience for 192 frames... -[2024-07-05 13:30:00,239][06970] Decorrelating experience for 160 frames... -[2024-07-05 13:30:00,281][06994] Decorrelating experience for 192 frames... -[2024-07-05 13:30:00,282][06969] Decorrelating experience for 128 frames... -[2024-07-05 13:30:00,332][06974] Decorrelating experience for 128 frames... -[2024-07-05 13:30:00,351][06966] Decorrelating experience for 224 frames... -[2024-07-05 13:30:00,380][06971] Decorrelating experience for 224 frames... -[2024-07-05 13:30:00,480][06996] Decorrelating experience for 128 frames... -[2024-07-05 13:30:00,486][06991] Decorrelating experience for 224 frames... -[2024-07-05 13:30:00,513][06968] Decorrelating experience for 224 frames... -[2024-07-05 13:30:00,521][06975] Decorrelating experience for 224 frames... -[2024-07-05 13:30:00,600][06994] Decorrelating experience for 224 frames... -[2024-07-05 13:30:00,608][06992] Decorrelating experience for 160 frames... -[2024-07-05 13:30:00,706][06974] Decorrelating experience for 160 frames... -[2024-07-05 13:30:00,727][06969] Decorrelating experience for 160 frames... -[2024-07-05 13:30:00,745][06996] Decorrelating experience for 160 frames... -[2024-07-05 13:30:00,754][06970] Decorrelating experience for 192 frames... -[2024-07-05 13:30:00,824][06992] Decorrelating experience for 192 frames... -[2024-07-05 13:30:00,961][06974] Decorrelating experience for 192 frames... -[2024-07-05 13:30:00,990][06969] Decorrelating experience for 192 frames... -[2024-07-05 13:30:01,011][06996] Decorrelating experience for 192 frames... -[2024-07-05 13:30:01,078][06945] Signal inference workers to stop experience collection... -[2024-07-05 13:30:01,084][06965] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 13:30:01,124][06992] Decorrelating experience for 224 frames... -[2024-07-05 13:30:01,190][06970] Decorrelating experience for 224 frames... -[2024-07-05 13:30:01,228][06974] Decorrelating experience for 224 frames... -[2024-07-05 13:30:01,241][06969] Decorrelating experience for 224 frames... -[2024-07-05 13:30:01,397][06996] Decorrelating experience for 224 frames... -[2024-07-05 13:30:02,411][06945] Signal inference workers to resume experience collection... -[2024-07-05 13:30:02,411][06965] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 13:30:03,849][03445] Fps is (10 sec: 13107.4, 60 sec: 13107.4, 300 sec: 13107.4). Total num frames: 300081152. Throughput: 0: 1491.2. Samples: 7456. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:30:03,849][03445] Avg episode reward: [(0, '3.892')] -[2024-07-05 13:30:04,174][06965] Updated weights for policy 0, policy_version 39075 (0.0094) -[2024-07-05 13:30:05,915][06965] Updated weights for policy 0, policy_version 39085 (0.0008) -[2024-07-05 13:30:07,638][06965] Updated weights for policy 0, policy_version 39095 (0.0008) -[2024-07-05 13:30:08,849][03445] Fps is (10 sec: 30310.5, 60 sec: 30310.5, 300 sec: 30310.5). Total num frames: 300318720. Throughput: 0: 7775.2. Samples: 77752. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:30:08,850][03445] Avg episode reward: [(0, '54.255')] -[2024-07-05 13:30:09,281][06965] Updated weights for policy 0, policy_version 39105 (0.0008) -[2024-07-05 13:30:10,957][06965] Updated weights for policy 0, policy_version 39115 (0.0007) -[2024-07-05 13:30:11,425][03445] Heartbeat connected on Batcher_0 -[2024-07-05 13:30:11,437][03445] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 13:30:11,442][03445] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 13:30:11,443][03445] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 13:30:11,445][03445] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 13:30:11,447][03445] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 13:30:11,448][03445] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 13:30:11,458][03445] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 13:30:11,462][03445] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 13:30:11,465][03445] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 13:30:11,471][03445] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 13:30:11,494][03445] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 13:30:11,498][03445] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 13:30:11,505][03445] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 13:30:11,507][03445] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 13:30:11,508][03445] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 13:30:11,511][03445] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 13:30:11,513][03445] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 13:30:11,514][03445] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 13:30:12,668][06965] Updated weights for policy 0, policy_version 39125 (0.0008) -[2024-07-05 13:30:13,849][03445] Fps is (10 sec: 47513.3, 60 sec: 36044.8, 300 sec: 36044.8). Total num frames: 300556288. Throughput: 0: 7612.8. Samples: 114192. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:30:13,850][03445] Avg episode reward: [(0, '55.362')] -[2024-07-05 13:30:14,305][06965] Updated weights for policy 0, policy_version 39135 (0.0009) -[2024-07-05 13:30:16,037][06965] Updated weights for policy 0, policy_version 39145 (0.0008) -[2024-07-05 13:30:17,699][06965] Updated weights for policy 0, policy_version 39155 (0.0007) -[2024-07-05 13:30:18,848][03445] Fps is (10 sec: 48333.2, 60 sec: 39321.8, 300 sec: 39321.8). Total num frames: 300802048. Throughput: 0: 9367.9. Samples: 187356. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:30:18,849][03445] Avg episode reward: [(0, '52.986')] -[2024-07-05 13:30:19,374][06965] Updated weights for policy 0, policy_version 39165 (0.0009) -[2024-07-05 13:30:21,083][06965] Updated weights for policy 0, policy_version 39175 (0.0010) -[2024-07-05 13:30:22,761][06965] Updated weights for policy 0, policy_version 39185 (0.0008) -[2024-07-05 13:30:23,849][03445] Fps is (10 sec: 49151.9, 60 sec: 41287.7, 300 sec: 41287.7). Total num frames: 301047808. Throughput: 0: 10394.7. Samples: 259868. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:30:23,849][03445] Avg episode reward: [(0, '53.771')] -[2024-07-05 13:30:24,470][06965] Updated weights for policy 0, policy_version 39195 (0.0009) -[2024-07-05 13:30:26,154][06965] Updated weights for policy 0, policy_version 39205 (0.0007) -[2024-07-05 13:30:27,819][06965] Updated weights for policy 0, policy_version 39215 (0.0008) -[2024-07-05 13:30:28,849][03445] Fps is (10 sec: 49151.6, 60 sec: 42598.4, 300 sec: 42598.4). Total num frames: 301293568. Throughput: 0: 9880.9. Samples: 296428. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:30:28,849][03445] Avg episode reward: [(0, '53.394')] -[2024-07-05 13:30:29,515][06965] Updated weights for policy 0, policy_version 39225 (0.0008) -[2024-07-05 13:30:31,197][06965] Updated weights for policy 0, policy_version 39235 (0.0008) -[2024-07-05 13:30:32,888][06965] Updated weights for policy 0, policy_version 39245 (0.0008) -[2024-07-05 13:30:33,849][03445] Fps is (10 sec: 48333.3, 60 sec: 43300.7, 300 sec: 43300.7). Total num frames: 301531136. Throughput: 0: 10546.4. Samples: 369124. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:30:33,850][03445] Avg episode reward: [(0, '53.424')] -[2024-07-05 13:30:34,596][06965] Updated weights for policy 0, policy_version 39255 (0.0007) -[2024-07-05 13:30:36,290][06965] Updated weights for policy 0, policy_version 39265 (0.0008) -[2024-07-05 13:30:37,967][06965] Updated weights for policy 0, policy_version 39275 (0.0008) -[2024-07-05 13:30:38,849][03445] Fps is (10 sec: 48332.9, 60 sec: 44032.0, 300 sec: 44032.0). Total num frames: 301776896. Throughput: 0: 11047.7. Samples: 441908. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:30:38,850][03445] Avg episode reward: [(0, '53.530')] -[2024-07-05 13:30:39,692][06965] Updated weights for policy 0, policy_version 39285 (0.0008) -[2024-07-05 13:30:41,376][06965] Updated weights for policy 0, policy_version 39295 (0.0008) -[2024-07-05 13:30:43,108][06965] Updated weights for policy 0, policy_version 39305 (0.0009) -[2024-07-05 13:30:43,849][03445] Fps is (10 sec: 48332.5, 60 sec: 44418.9, 300 sec: 44418.9). Total num frames: 302014464. Throughput: 0: 10619.5. Samples: 477876. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:30:43,850][03445] Avg episode reward: [(0, '53.857')] -[2024-07-05 13:30:44,815][06965] Updated weights for policy 0, policy_version 39315 (0.0007) -[2024-07-05 13:30:46,503][06965] Updated weights for policy 0, policy_version 39325 (0.0008) -[2024-07-05 13:30:48,165][06965] Updated weights for policy 0, policy_version 39335 (0.0008) -[2024-07-05 13:30:48,849][03445] Fps is (10 sec: 48332.8, 60 sec: 44892.2, 300 sec: 44892.2). Total num frames: 302260224. Throughput: 0: 12065.3. Samples: 550396. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:30:48,850][03445] Avg episode reward: [(0, '53.820')] -[2024-07-05 13:30:49,917][06965] Updated weights for policy 0, policy_version 39345 (0.0012) -[2024-07-05 13:30:51,594][06965] Updated weights for policy 0, policy_version 39355 (0.0008) -[2024-07-05 13:30:53,297][06965] Updated weights for policy 0, policy_version 39365 (0.0007) -[2024-07-05 13:30:53,848][03445] Fps is (10 sec: 48333.1, 60 sec: 45130.6, 300 sec: 45130.6). Total num frames: 302497792. Throughput: 0: 12099.5. Samples: 622228. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:30:53,850][03445] Avg episode reward: [(0, '52.000')] -[2024-07-05 13:30:55,026][06965] Updated weights for policy 0, policy_version 39375 (0.0012) -[2024-07-05 13:30:56,715][06965] Updated weights for policy 0, policy_version 39385 (0.0007) -[2024-07-05 13:30:58,518][06965] Updated weights for policy 0, policy_version 39395 (0.0008) -[2024-07-05 13:30:58,849][03445] Fps is (10 sec: 46694.4, 60 sec: 45192.6, 300 sec: 45192.6). Total num frames: 302727168. Throughput: 0: 12079.3. Samples: 657760. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:30:58,849][03445] Avg episode reward: [(0, '53.588')] -[2024-07-05 13:31:00,230][06965] Updated weights for policy 0, policy_version 39405 (0.0011) -[2024-07-05 13:31:01,943][06965] Updated weights for policy 0, policy_version 39415 (0.0008) -[2024-07-05 13:31:03,635][06965] Updated weights for policy 0, policy_version 39425 (0.0008) -[2024-07-05 13:31:03,849][03445] Fps is (10 sec: 47513.3, 60 sec: 48196.2, 300 sec: 45497.1). Total num frames: 302972928. Throughput: 0: 12044.0. Samples: 729336. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:31:03,850][03445] Avg episode reward: [(0, '54.131')] -[2024-07-05 13:31:05,304][06965] Updated weights for policy 0, policy_version 39435 (0.0008) -[2024-07-05 13:31:06,966][06965] Updated weights for policy 0, policy_version 39445 (0.0011) -[2024-07-05 13:31:08,722][06965] Updated weights for policy 0, policy_version 39455 (0.0009) -[2024-07-05 13:31:08,849][03445] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 45641.2). Total num frames: 303210496. Throughput: 0: 12023.8. Samples: 800940. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:31:08,850][03445] Avg episode reward: [(0, '54.367')] -[2024-07-05 13:31:10,446][06965] Updated weights for policy 0, policy_version 39465 (0.0008) -[2024-07-05 13:31:12,170][06965] Updated weights for policy 0, policy_version 39475 (0.0008) -[2024-07-05 13:31:13,848][03445] Fps is (10 sec: 47513.9, 60 sec: 48196.3, 300 sec: 45766.0). Total num frames: 303448064. Throughput: 0: 12020.5. Samples: 837348. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:31:13,849][03445] Avg episode reward: [(0, '53.694')] -[2024-07-05 13:31:13,889][06965] Updated weights for policy 0, policy_version 39485 (0.0008) -[2024-07-05 13:31:15,619][06965] Updated weights for policy 0, policy_version 39495 (0.0008) -[2024-07-05 13:31:17,348][06965] Updated weights for policy 0, policy_version 39505 (0.0008) -[2024-07-05 13:31:18,849][03445] Fps is (10 sec: 48332.7, 60 sec: 48196.2, 300 sec: 45977.6). Total num frames: 303693824. Throughput: 0: 11997.8. Samples: 909024. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:31:18,850][03445] Avg episode reward: [(0, '53.116')] -[2024-07-05 13:31:19,026][06965] Updated weights for policy 0, policy_version 39515 (0.0008) -[2024-07-05 13:31:20,699][06965] Updated weights for policy 0, policy_version 39525 (0.0008) -[2024-07-05 13:31:22,373][06965] Updated weights for policy 0, policy_version 39535 (0.0008) -[2024-07-05 13:31:23,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48059.8, 300 sec: 46068.0). Total num frames: 303931392. Throughput: 0: 11985.7. Samples: 981264. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:31:23,850][03445] Avg episode reward: [(0, '52.257')] -[2024-07-05 13:31:24,076][06965] Updated weights for policy 0, policy_version 39545 (0.0010) -[2024-07-05 13:31:25,757][06965] Updated weights for policy 0, policy_version 39555 (0.0010) -[2024-07-05 13:31:27,463][06965] Updated weights for policy 0, policy_version 39565 (0.0009) -[2024-07-05 13:31:28,849][03445] Fps is (10 sec: 47512.4, 60 sec: 47923.0, 300 sec: 46148.2). Total num frames: 304168960. Throughput: 0: 11999.7. Samples: 1017864. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:31:28,850][03445] Avg episode reward: [(0, '54.227')] -[2024-07-05 13:31:29,223][06965] Updated weights for policy 0, policy_version 39575 (0.0008) -[2024-07-05 13:31:30,921][06965] Updated weights for policy 0, policy_version 39585 (0.0013) -[2024-07-05 13:31:32,658][06965] Updated weights for policy 0, policy_version 39595 (0.0008) -[2024-07-05 13:31:33,849][03445] Fps is (10 sec: 47511.1, 60 sec: 47922.7, 300 sec: 46219.9). Total num frames: 304406528. Throughput: 0: 11972.0. Samples: 1089140. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:31:33,851][03445] Avg episode reward: [(0, '51.428')] -[2024-07-05 13:31:34,425][06965] Updated weights for policy 0, policy_version 39605 (0.0008) -[2024-07-05 13:31:36,146][06965] Updated weights for policy 0, policy_version 39615 (0.0008) -[2024-07-05 13:31:37,958][06965] Updated weights for policy 0, policy_version 39625 (0.0011) -[2024-07-05 13:31:38,849][03445] Fps is (10 sec: 47514.9, 60 sec: 47786.7, 300 sec: 46284.8). Total num frames: 304644096. Throughput: 0: 11928.2. Samples: 1158996. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:31:38,850][03445] Avg episode reward: [(0, '52.000')] -[2024-07-05 13:31:39,661][06965] Updated weights for policy 0, policy_version 39635 (0.0008) -[2024-07-05 13:31:41,398][06965] Updated weights for policy 0, policy_version 39645 (0.0009) -[2024-07-05 13:31:43,111][06965] Updated weights for policy 0, policy_version 39655 (0.0008) -[2024-07-05 13:31:43,849][03445] Fps is (10 sec: 47514.5, 60 sec: 47786.4, 300 sec: 46343.2). Total num frames: 304881664. Throughput: 0: 11924.3. Samples: 1194356. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:31:43,850][03445] Avg episode reward: [(0, '54.357')] -[2024-07-05 13:31:44,790][06965] Updated weights for policy 0, policy_version 39665 (0.0007) -[2024-07-05 13:31:46,504][06965] Updated weights for policy 0, policy_version 39675 (0.0008) -[2024-07-05 13:31:48,191][06965] Updated weights for policy 0, policy_version 39685 (0.0011) -[2024-07-05 13:31:48,849][03445] Fps is (10 sec: 47513.5, 60 sec: 47650.1, 300 sec: 46396.5). Total num frames: 305119232. Throughput: 0: 11944.6. Samples: 1266844. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:31:48,849][03445] Avg episode reward: [(0, '51.336')] -[2024-07-05 13:31:48,861][06945] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000039689_305127424.pth... -[2024-07-05 13:31:48,938][06945] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000038506_295436288.pth -[2024-07-05 13:31:49,935][06965] Updated weights for policy 0, policy_version 39695 (0.0008) -[2024-07-05 13:31:51,617][06965] Updated weights for policy 0, policy_version 39705 (0.0008) -[2024-07-05 13:31:53,290][06965] Updated weights for policy 0, policy_version 39715 (0.0009) -[2024-07-05 13:31:53,849][03445] Fps is (10 sec: 48333.5, 60 sec: 47786.5, 300 sec: 46516.3). Total num frames: 305364992. Throughput: 0: 11964.4. Samples: 1339340. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:31:53,850][03445] Avg episode reward: [(0, '54.011')] -[2024-07-05 13:31:54,981][06965] Updated weights for policy 0, policy_version 39725 (0.0007) -[2024-07-05 13:31:56,653][06965] Updated weights for policy 0, policy_version 39735 (0.0008) -[2024-07-05 13:31:58,334][06965] Updated weights for policy 0, policy_version 39745 (0.0007) -[2024-07-05 13:31:58,849][03445] Fps is (10 sec: 48332.6, 60 sec: 47923.2, 300 sec: 46557.9). Total num frames: 305602560. Throughput: 0: 11963.8. Samples: 1375720. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:31:58,850][03445] Avg episode reward: [(0, '52.395')] -[2024-07-05 13:32:00,029][06965] Updated weights for policy 0, policy_version 39755 (0.0011) -[2024-07-05 13:32:01,740][06965] Updated weights for policy 0, policy_version 39765 (0.0008) -[2024-07-05 13:32:03,430][06965] Updated weights for policy 0, policy_version 39775 (0.0009) -[2024-07-05 13:32:03,849][03445] Fps is (10 sec: 48332.7, 60 sec: 47923.1, 300 sec: 46661.6). Total num frames: 305848320. Throughput: 0: 11990.0. Samples: 1448576. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:32:03,850][03445] Avg episode reward: [(0, '53.786')] -[2024-07-05 13:32:05,075][06965] Updated weights for policy 0, policy_version 39785 (0.0007) -[2024-07-05 13:32:06,748][06965] Updated weights for policy 0, policy_version 39795 (0.0011) -[2024-07-05 13:32:08,429][06965] Updated weights for policy 0, policy_version 39805 (0.0008) -[2024-07-05 13:32:08,849][03445] Fps is (10 sec: 49151.2, 60 sec: 48059.6, 300 sec: 46757.4). Total num frames: 306094080. Throughput: 0: 12007.1. Samples: 1521588. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:32:08,850][03445] Avg episode reward: [(0, '53.373')] -[2024-07-05 13:32:10,094][06965] Updated weights for policy 0, policy_version 39815 (0.0008) -[2024-07-05 13:32:11,830][06965] Updated weights for policy 0, policy_version 39825 (0.0007) -[2024-07-05 13:32:13,520][06965] Updated weights for policy 0, policy_version 39835 (0.0008) -[2024-07-05 13:32:13,849][03445] Fps is (10 sec: 48333.6, 60 sec: 48059.7, 300 sec: 46785.4). Total num frames: 306331648. Throughput: 0: 12008.2. Samples: 1558228. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:32:13,849][03445] Avg episode reward: [(0, '53.502')] -[2024-07-05 13:32:15,200][06965] Updated weights for policy 0, policy_version 39845 (0.0008) -[2024-07-05 13:32:16,866][06965] Updated weights for policy 0, policy_version 39855 (0.0007) -[2024-07-05 13:32:18,556][06965] Updated weights for policy 0, policy_version 39865 (0.0008) -[2024-07-05 13:32:18,849][03445] Fps is (10 sec: 48333.6, 60 sec: 48059.7, 300 sec: 46869.9). Total num frames: 306577408. Throughput: 0: 12042.6. Samples: 1631052. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:32:18,850][03445] Avg episode reward: [(0, '51.945')] -[2024-07-05 13:32:20,226][06965] Updated weights for policy 0, policy_version 39875 (0.0010) -[2024-07-05 13:32:21,878][06965] Updated weights for policy 0, policy_version 39885 (0.0007) -[2024-07-05 13:32:23,606][06965] Updated weights for policy 0, policy_version 39895 (0.0008) -[2024-07-05 13:32:23,849][03445] Fps is (10 sec: 49152.3, 60 sec: 48196.3, 300 sec: 46948.7). Total num frames: 306823168. Throughput: 0: 12114.0. Samples: 1704124. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:32:23,849][03445] Avg episode reward: [(0, '52.205')] -[2024-07-05 13:32:25,270][06965] Updated weights for policy 0, policy_version 39905 (0.0011) -[2024-07-05 13:32:26,949][06965] Updated weights for policy 0, policy_version 39915 (0.0008) -[2024-07-05 13:32:28,626][06965] Updated weights for policy 0, policy_version 39925 (0.0008) -[2024-07-05 13:32:28,849][03445] Fps is (10 sec: 49152.2, 60 sec: 48333.0, 300 sec: 47022.1). Total num frames: 307068928. Throughput: 0: 12141.7. Samples: 1740728. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:32:28,850][03445] Avg episode reward: [(0, '54.451')] -[2024-07-05 13:32:30,298][06965] Updated weights for policy 0, policy_version 39935 (0.0008) -[2024-07-05 13:32:31,989][06965] Updated weights for policy 0, policy_version 39945 (0.0010) -[2024-07-05 13:32:33,724][06965] Updated weights for policy 0, policy_version 39955 (0.0010) -[2024-07-05 13:32:33,849][03445] Fps is (10 sec: 48332.3, 60 sec: 48333.2, 300 sec: 47037.9). Total num frames: 307306496. Throughput: 0: 12152.4. Samples: 1813704. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:32:33,850][03445] Avg episode reward: [(0, '51.104')] -[2024-07-05 13:32:35,401][06965] Updated weights for policy 0, policy_version 39965 (0.0007) -[2024-07-05 13:32:37,071][06965] Updated weights for policy 0, policy_version 39975 (0.0008) -[2024-07-05 13:32:38,758][06965] Updated weights for policy 0, policy_version 39985 (0.0007) -[2024-07-05 13:32:38,849][03445] Fps is (10 sec: 48332.7, 60 sec: 48469.3, 300 sec: 47104.0). Total num frames: 307552256. Throughput: 0: 12159.2. Samples: 1886504. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:32:38,850][03445] Avg episode reward: [(0, '52.971')] -[2024-07-05 13:32:40,424][06965] Updated weights for policy 0, policy_version 39995 (0.0008) -[2024-07-05 13:32:42,154][06965] Updated weights for policy 0, policy_version 40005 (0.0007) -[2024-07-05 13:32:43,849][03445] Fps is (10 sec: 48332.6, 60 sec: 48469.5, 300 sec: 47116.4). Total num frames: 307789824. Throughput: 0: 12160.8. Samples: 1922956. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:32:43,850][03445] Avg episode reward: [(0, '53.638')] -[2024-07-05 13:32:43,887][06965] Updated weights for policy 0, policy_version 40015 (0.0010) -[2024-07-05 13:32:45,529][06965] Updated weights for policy 0, policy_version 40025 (0.0009) -[2024-07-05 13:32:47,213][06965] Updated weights for policy 0, policy_version 40035 (0.0008) -[2024-07-05 13:32:48,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48605.9, 300 sec: 47176.3). Total num frames: 308035584. Throughput: 0: 12160.2. Samples: 1995784. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:32:48,850][03445] Avg episode reward: [(0, '53.772')] -[2024-07-05 13:32:48,876][06965] Updated weights for policy 0, policy_version 40045 (0.0008) -[2024-07-05 13:32:50,570][06965] Updated weights for policy 0, policy_version 40055 (0.0010) -[2024-07-05 13:32:52,232][06965] Updated weights for policy 0, policy_version 40065 (0.0008) -[2024-07-05 13:32:53,849][03445] Fps is (10 sec: 49152.3, 60 sec: 48606.0, 300 sec: 47232.7). Total num frames: 308281344. Throughput: 0: 12166.4. Samples: 2069076. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:32:53,850][03445] Avg episode reward: [(0, '52.592')] -[2024-07-05 13:32:53,907][06965] Updated weights for policy 0, policy_version 40075 (0.0010) -[2024-07-05 13:32:55,576][06965] Updated weights for policy 0, policy_version 40085 (0.0008) -[2024-07-05 13:32:57,241][06965] Updated weights for policy 0, policy_version 40095 (0.0009) -[2024-07-05 13:32:58,849][03445] Fps is (10 sec: 49151.0, 60 sec: 48742.3, 300 sec: 47286.0). Total num frames: 308527104. Throughput: 0: 12162.2. Samples: 2105528. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:32:58,850][03445] Avg episode reward: [(0, '53.471')] -[2024-07-05 13:32:58,963][06965] Updated weights for policy 0, policy_version 40105 (0.0008) -[2024-07-05 13:33:00,637][06965] Updated weights for policy 0, policy_version 40115 (0.0009) -[2024-07-05 13:33:02,314][06965] Updated weights for policy 0, policy_version 40125 (0.0010) -[2024-07-05 13:33:03,849][03445] Fps is (10 sec: 49152.2, 60 sec: 48742.5, 300 sec: 47336.5). Total num frames: 308772864. Throughput: 0: 12167.7. Samples: 2178596. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:33:03,850][03445] Avg episode reward: [(0, '53.426')] -[2024-07-05 13:33:03,996][06965] Updated weights for policy 0, policy_version 40135 (0.0007) -[2024-07-05 13:33:05,676][06965] Updated weights for policy 0, policy_version 40145 (0.0008) -[2024-07-05 13:33:07,379][06965] Updated weights for policy 0, policy_version 40155 (0.0008) -[2024-07-05 13:33:08,848][03445] Fps is (10 sec: 48334.2, 60 sec: 48606.1, 300 sec: 47341.2). Total num frames: 309010432. Throughput: 0: 12165.9. Samples: 2251588. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:33:08,849][03445] Avg episode reward: [(0, '52.970')] -[2024-07-05 13:33:09,063][06965] Updated weights for policy 0, policy_version 40165 (0.0008) -[2024-07-05 13:33:10,722][06965] Updated weights for policy 0, policy_version 40175 (0.0008) -[2024-07-05 13:33:12,387][06965] Updated weights for policy 0, policy_version 40185 (0.0008) -[2024-07-05 13:33:13,849][03445] Fps is (10 sec: 48332.1, 60 sec: 48742.3, 300 sec: 47387.5). Total num frames: 309256192. Throughput: 0: 12168.1. Samples: 2288296. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:33:13,850][03445] Avg episode reward: [(0, '52.487')] -[2024-07-05 13:33:14,090][06965] Updated weights for policy 0, policy_version 40195 (0.0007) -[2024-07-05 13:33:15,821][06965] Updated weights for policy 0, policy_version 40205 (0.0010) -[2024-07-05 13:33:17,422][06965] Updated weights for policy 0, policy_version 40215 (0.0009) -[2024-07-05 13:33:18,849][03445] Fps is (10 sec: 49151.3, 60 sec: 48742.4, 300 sec: 47431.7). Total num frames: 309501952. Throughput: 0: 12165.8. Samples: 2361164. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:33:18,850][03445] Avg episode reward: [(0, '54.506')] -[2024-07-05 13:33:19,128][06965] Updated weights for policy 0, policy_version 40225 (0.0010) -[2024-07-05 13:33:20,819][06965] Updated weights for policy 0, policy_version 40235 (0.0010) -[2024-07-05 13:33:22,471][06965] Updated weights for policy 0, policy_version 40245 (0.0008) -[2024-07-05 13:33:23,849][03445] Fps is (10 sec: 49152.4, 60 sec: 48742.3, 300 sec: 47473.6). Total num frames: 309747712. Throughput: 0: 12169.2. Samples: 2434120. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:33:23,850][03445] Avg episode reward: [(0, '52.507')] -[2024-07-05 13:33:24,172][06965] Updated weights for policy 0, policy_version 40255 (0.0008) -[2024-07-05 13:33:25,878][06965] Updated weights for policy 0, policy_version 40265 (0.0007) -[2024-07-05 13:33:27,571][06965] Updated weights for policy 0, policy_version 40275 (0.0010) -[2024-07-05 13:33:28,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48605.9, 300 sec: 47474.6). Total num frames: 309985280. Throughput: 0: 12171.7. Samples: 2470680. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:33:28,849][03445] Avg episode reward: [(0, '54.460')] -[2024-07-05 13:33:29,215][06965] Updated weights for policy 0, policy_version 40285 (0.0009) -[2024-07-05 13:33:30,916][06965] Updated weights for policy 0, policy_version 40295 (0.0014) -[2024-07-05 13:33:32,609][06965] Updated weights for policy 0, policy_version 40305 (0.0012) -[2024-07-05 13:33:33,849][03445] Fps is (10 sec: 48332.2, 60 sec: 48742.3, 300 sec: 47513.6). Total num frames: 310231040. Throughput: 0: 12181.5. Samples: 2543956. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:33:33,850][03445] Avg episode reward: [(0, '52.894')] -[2024-07-05 13:33:34,273][06965] Updated weights for policy 0, policy_version 40315 (0.0008) -[2024-07-05 13:33:35,928][06965] Updated weights for policy 0, policy_version 40325 (0.0008) -[2024-07-05 13:33:37,634][06965] Updated weights for policy 0, policy_version 40335 (0.0010) -[2024-07-05 13:33:38,849][03445] Fps is (10 sec: 49151.3, 60 sec: 48742.3, 300 sec: 47550.8). Total num frames: 310476800. Throughput: 0: 12180.9. Samples: 2617216. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:33:38,850][03445] Avg episode reward: [(0, '54.238')] -[2024-07-05 13:33:39,336][06965] Updated weights for policy 0, policy_version 40345 (0.0008) -[2024-07-05 13:33:41,022][06965] Updated weights for policy 0, policy_version 40355 (0.0008) -[2024-07-05 13:33:42,681][06965] Updated weights for policy 0, policy_version 40365 (0.0008) -[2024-07-05 13:33:43,849][03445] Fps is (10 sec: 48333.5, 60 sec: 48742.5, 300 sec: 47550.0). Total num frames: 310714368. Throughput: 0: 12172.0. Samples: 2653268. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:33:43,849][03445] Avg episode reward: [(0, '53.464')] -[2024-07-05 13:33:44,359][06965] Updated weights for policy 0, policy_version 40375 (0.0007) -[2024-07-05 13:33:46,045][06965] Updated weights for policy 0, policy_version 40385 (0.0011) -[2024-07-05 13:33:47,677][06965] Updated weights for policy 0, policy_version 40395 (0.0011) -[2024-07-05 13:33:48,849][03445] Fps is (10 sec: 48333.3, 60 sec: 48742.4, 300 sec: 47584.8). Total num frames: 310960128. Throughput: 0: 12177.9. Samples: 2726600. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:33:48,850][03445] Avg episode reward: [(0, '52.861')] -[2024-07-05 13:33:48,855][06945] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000040401_310960128.pth... -[2024-07-05 13:33:48,923][06945] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000039065_300015616.pth -[2024-07-05 13:33:49,363][06965] Updated weights for policy 0, policy_version 40405 (0.0010) -[2024-07-05 13:33:51,095][06965] Updated weights for policy 0, policy_version 40415 (0.0008) -[2024-07-05 13:33:52,771][06965] Updated weights for policy 0, policy_version 40425 (0.0007) -[2024-07-05 13:33:53,849][03445] Fps is (10 sec: 49152.0, 60 sec: 48742.4, 300 sec: 47618.2). Total num frames: 311205888. Throughput: 0: 12186.1. Samples: 2799964. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 13:33:53,850][03445] Avg episode reward: [(0, '52.410')] -[2024-07-05 13:33:54,488][06965] Updated weights for policy 0, policy_version 40435 (0.0009) -[2024-07-05 13:33:56,153][06965] Updated weights for policy 0, policy_version 40445 (0.0007) -[2024-07-05 13:33:57,814][06965] Updated weights for policy 0, policy_version 40455 (0.0008) -[2024-07-05 13:33:58,849][03445] Fps is (10 sec: 49152.1, 60 sec: 48742.6, 300 sec: 47650.1). Total num frames: 311451648. Throughput: 0: 12179.9. Samples: 2836388. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 13:33:58,850][03445] Avg episode reward: [(0, '53.687')] -[2024-07-05 13:33:59,474][06965] Updated weights for policy 0, policy_version 40465 (0.0009) -[2024-07-05 13:34:01,206][06965] Updated weights for policy 0, policy_version 40475 (0.0010) -[2024-07-05 13:34:02,893][06965] Updated weights for policy 0, policy_version 40485 (0.0007) -[2024-07-05 13:34:03,849][03445] Fps is (10 sec: 48333.0, 60 sec: 48605.9, 300 sec: 47647.4). Total num frames: 311689216. Throughput: 0: 12173.3. Samples: 2908964. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 13:34:03,849][03445] Avg episode reward: [(0, '52.142')] -[2024-07-05 13:34:04,549][06965] Updated weights for policy 0, policy_version 40495 (0.0007) -[2024-07-05 13:34:06,231][06965] Updated weights for policy 0, policy_version 40505 (0.0008) -[2024-07-05 13:34:07,885][06965] Updated weights for policy 0, policy_version 40515 (0.0007) -[2024-07-05 13:34:08,849][03445] Fps is (10 sec: 48332.9, 60 sec: 48742.3, 300 sec: 47677.5). Total num frames: 311934976. Throughput: 0: 12188.2. Samples: 2982588. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 13:34:08,849][03445] Avg episode reward: [(0, '53.219')] -[2024-07-05 13:34:09,555][06965] Updated weights for policy 0, policy_version 40525 (0.0008) -[2024-07-05 13:34:11,252][06965] Updated weights for policy 0, policy_version 40535 (0.0007) -[2024-07-05 13:34:12,917][06965] Updated weights for policy 0, policy_version 40545 (0.0010) -[2024-07-05 13:34:13,849][03445] Fps is (10 sec: 49151.4, 60 sec: 48742.4, 300 sec: 47706.3). Total num frames: 312180736. Throughput: 0: 12188.6. Samples: 3019168. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 13:34:13,850][03445] Avg episode reward: [(0, '53.627')] -[2024-07-05 13:34:14,573][06965] Updated weights for policy 0, policy_version 40555 (0.0008) -[2024-07-05 13:34:16,320][06965] Updated weights for policy 0, policy_version 40565 (0.0008) -[2024-07-05 13:34:18,006][06965] Updated weights for policy 0, policy_version 40575 (0.0009) -[2024-07-05 13:34:18,849][03445] Fps is (10 sec: 49152.0, 60 sec: 48742.4, 300 sec: 47734.2). Total num frames: 312426496. Throughput: 0: 12181.0. Samples: 3092100. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:34:18,849][03445] Avg episode reward: [(0, '52.257')] -[2024-07-05 13:34:19,640][06965] Updated weights for policy 0, policy_version 40585 (0.0008) -[2024-07-05 13:34:21,334][06965] Updated weights for policy 0, policy_version 40595 (0.0008) -[2024-07-05 13:34:23,009][06965] Updated weights for policy 0, policy_version 40605 (0.0009) -[2024-07-05 13:34:23,849][03445] Fps is (10 sec: 48333.5, 60 sec: 48605.9, 300 sec: 47730.0). Total num frames: 312664064. Throughput: 0: 12177.9. Samples: 3165220. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:34:23,849][03445] Avg episode reward: [(0, '54.902')] -[2024-07-05 13:34:24,690][06965] Updated weights for policy 0, policy_version 40615 (0.0009) -[2024-07-05 13:34:26,364][06965] Updated weights for policy 0, policy_version 40625 (0.0007) -[2024-07-05 13:34:28,041][06965] Updated weights for policy 0, policy_version 40635 (0.0014) -[2024-07-05 13:34:28,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48742.4, 300 sec: 47756.3). Total num frames: 312909824. Throughput: 0: 12187.1. Samples: 3201688. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:34:28,849][03445] Avg episode reward: [(0, '52.785')] -[2024-07-05 13:34:29,705][06965] Updated weights for policy 0, policy_version 40645 (0.0007) -[2024-07-05 13:34:31,416][06965] Updated weights for policy 0, policy_version 40655 (0.0007) -[2024-07-05 13:34:33,131][06965] Updated weights for policy 0, policy_version 40665 (0.0009) -[2024-07-05 13:34:33,848][03445] Fps is (10 sec: 49152.2, 60 sec: 48742.6, 300 sec: 47781.7). Total num frames: 313155584. Throughput: 0: 12179.8. Samples: 3274688. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:34:33,850][03445] Avg episode reward: [(0, '56.094')] -[2024-07-05 13:34:33,851][06945] Saving new best policy, reward=56.094! -[2024-07-05 13:34:34,843][06965] Updated weights for policy 0, policy_version 40675 (0.0009) -[2024-07-05 13:34:36,543][06965] Updated weights for policy 0, policy_version 40685 (0.0008) -[2024-07-05 13:34:38,232][06965] Updated weights for policy 0, policy_version 40695 (0.0011) -[2024-07-05 13:34:38,849][03445] Fps is (10 sec: 48332.6, 60 sec: 48605.9, 300 sec: 47776.9). Total num frames: 313393152. Throughput: 0: 12163.2. Samples: 3347308. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:34:38,849][03445] Avg episode reward: [(0, '51.213')] -[2024-07-05 13:34:39,890][06965] Updated weights for policy 0, policy_version 40705 (0.0008) -[2024-07-05 13:34:41,552][06965] Updated weights for policy 0, policy_version 40715 (0.0008) -[2024-07-05 13:34:43,247][06965] Updated weights for policy 0, policy_version 40725 (0.0008) -[2024-07-05 13:34:43,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48742.4, 300 sec: 47801.0). Total num frames: 313638912. Throughput: 0: 12166.5. Samples: 3383880. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:34:43,849][03445] Avg episode reward: [(0, '52.997')] -[2024-07-05 13:34:44,920][06965] Updated weights for policy 0, policy_version 40735 (0.0008) -[2024-07-05 13:34:46,602][06965] Updated weights for policy 0, policy_version 40745 (0.0009) -[2024-07-05 13:34:48,265][06965] Updated weights for policy 0, policy_version 40755 (0.0008) -[2024-07-05 13:34:48,849][03445] Fps is (10 sec: 49151.5, 60 sec: 48742.3, 300 sec: 47824.3). Total num frames: 313884672. Throughput: 0: 12185.8. Samples: 3457328. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:34:48,850][03445] Avg episode reward: [(0, '52.730')] -[2024-07-05 13:34:49,955][06965] Updated weights for policy 0, policy_version 40765 (0.0008) -[2024-07-05 13:34:51,646][06965] Updated weights for policy 0, policy_version 40775 (0.0008) -[2024-07-05 13:34:53,344][06965] Updated weights for policy 0, policy_version 40785 (0.0008) -[2024-07-05 13:34:53,849][03445] Fps is (10 sec: 49151.9, 60 sec: 48742.4, 300 sec: 47846.8). Total num frames: 314130432. Throughput: 0: 12161.6. Samples: 3529860. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:34:53,849][03445] Avg episode reward: [(0, '52.380')] -[2024-07-05 13:34:55,024][06965] Updated weights for policy 0, policy_version 40795 (0.0008) -[2024-07-05 13:34:56,742][06965] Updated weights for policy 0, policy_version 40805 (0.0007) -[2024-07-05 13:34:58,429][06965] Updated weights for policy 0, policy_version 40815 (0.0008) -[2024-07-05 13:34:58,849][03445] Fps is (10 sec: 48333.3, 60 sec: 48605.9, 300 sec: 48430.0). Total num frames: 314368000. Throughput: 0: 12158.3. Samples: 3566288. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:34:58,850][03445] Avg episode reward: [(0, '55.515')] -[2024-07-05 13:35:00,102][06965] Updated weights for policy 0, policy_version 40825 (0.0007) -[2024-07-05 13:35:01,805][06965] Updated weights for policy 0, policy_version 40835 (0.0008) -[2024-07-05 13:35:03,502][06965] Updated weights for policy 0, policy_version 40845 (0.0008) -[2024-07-05 13:35:03,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48742.3, 300 sec: 48457.7). Total num frames: 314613760. Throughput: 0: 12149.1. Samples: 3638812. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:35:03,850][03445] Avg episode reward: [(0, '54.044')] -[2024-07-05 13:35:05,170][06965] Updated weights for policy 0, policy_version 40855 (0.0008) -[2024-07-05 13:35:06,863][06965] Updated weights for policy 0, policy_version 40865 (0.0009) -[2024-07-05 13:35:08,543][06965] Updated weights for policy 0, policy_version 40875 (0.0009) -[2024-07-05 13:35:08,849][03445] Fps is (10 sec: 48332.3, 60 sec: 48605.7, 300 sec: 48457.7). Total num frames: 314851328. Throughput: 0: 12151.6. Samples: 3712044. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:35:08,850][03445] Avg episode reward: [(0, '51.877')] -[2024-07-05 13:35:10,206][06965] Updated weights for policy 0, policy_version 40885 (0.0008) -[2024-07-05 13:35:11,926][06965] Updated weights for policy 0, policy_version 40895 (0.0010) -[2024-07-05 13:35:13,589][06965] Updated weights for policy 0, policy_version 40905 (0.0008) -[2024-07-05 13:35:13,849][03445] Fps is (10 sec: 48333.4, 60 sec: 48606.0, 300 sec: 48457.8). Total num frames: 315097088. Throughput: 0: 12147.1. Samples: 3748308. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:35:13,850][03445] Avg episode reward: [(0, '54.302')] -[2024-07-05 13:35:15,261][06965] Updated weights for policy 0, policy_version 40915 (0.0010) -[2024-07-05 13:35:16,923][06965] Updated weights for policy 0, policy_version 40925 (0.0007) -[2024-07-05 13:35:18,626][06965] Updated weights for policy 0, policy_version 40935 (0.0010) -[2024-07-05 13:35:18,849][03445] Fps is (10 sec: 49152.5, 60 sec: 48605.8, 300 sec: 48457.8). Total num frames: 315342848. Throughput: 0: 12159.4. Samples: 3821860. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:35:18,850][03445] Avg episode reward: [(0, '54.585')] -[2024-07-05 13:35:20,294][06965] Updated weights for policy 0, policy_version 40945 (0.0008) -[2024-07-05 13:35:22,017][06965] Updated weights for policy 0, policy_version 40955 (0.0008) -[2024-07-05 13:35:23,684][06965] Updated weights for policy 0, policy_version 40965 (0.0008) -[2024-07-05 13:35:23,849][03445] Fps is (10 sec: 49151.6, 60 sec: 48742.3, 300 sec: 48457.8). Total num frames: 315588608. Throughput: 0: 12162.9. Samples: 3894640. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:35:23,850][03445] Avg episode reward: [(0, '54.188')] -[2024-07-05 13:35:25,363][06965] Updated weights for policy 0, policy_version 40975 (0.0009) -[2024-07-05 13:35:27,067][06965] Updated weights for policy 0, policy_version 40985 (0.0008) -[2024-07-05 13:35:28,745][06965] Updated weights for policy 0, policy_version 40995 (0.0008) -[2024-07-05 13:35:28,849][03445] Fps is (10 sec: 48332.9, 60 sec: 48605.8, 300 sec: 48457.8). Total num frames: 315826176. Throughput: 0: 12152.3. Samples: 3930732. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:35:28,849][03445] Avg episode reward: [(0, '53.424')] -[2024-07-05 13:35:30,407][06965] Updated weights for policy 0, policy_version 41005 (0.0008) -[2024-07-05 13:35:32,080][06965] Updated weights for policy 0, policy_version 41015 (0.0007) -[2024-07-05 13:35:33,742][06965] Updated weights for policy 0, policy_version 41025 (0.0007) -[2024-07-05 13:35:33,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48605.8, 300 sec: 48457.8). Total num frames: 316071936. Throughput: 0: 12147.4. Samples: 4003960. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:35:33,849][03445] Avg episode reward: [(0, '54.830')] -[2024-07-05 13:35:35,413][06965] Updated weights for policy 0, policy_version 41035 (0.0013) -[2024-07-05 13:35:37,094][06965] Updated weights for policy 0, policy_version 41045 (0.0008) -[2024-07-05 13:35:38,794][06965] Updated weights for policy 0, policy_version 41055 (0.0007) -[2024-07-05 13:35:38,849][03445] Fps is (10 sec: 49152.0, 60 sec: 48742.4, 300 sec: 48485.5). Total num frames: 316317696. Throughput: 0: 12161.9. Samples: 4077144. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:35:38,850][03445] Avg episode reward: [(0, '51.923')] -[2024-07-05 13:35:40,493][06965] Updated weights for policy 0, policy_version 41065 (0.0008) -[2024-07-05 13:35:42,234][06965] Updated weights for policy 0, policy_version 41075 (0.0008) -[2024-07-05 13:35:43,849][03445] Fps is (10 sec: 48331.7, 60 sec: 48605.7, 300 sec: 48457.7). Total num frames: 316555264. Throughput: 0: 12161.8. Samples: 4113572. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:35:43,850][03445] Avg episode reward: [(0, '53.435')] -[2024-07-05 13:35:43,910][06965] Updated weights for policy 0, policy_version 41085 (0.0009) -[2024-07-05 13:35:45,620][06965] Updated weights for policy 0, policy_version 41095 (0.0007) -[2024-07-05 13:35:47,293][06965] Updated weights for policy 0, policy_version 41105 (0.0007) -[2024-07-05 13:35:48,849][03445] Fps is (10 sec: 48331.7, 60 sec: 48605.8, 300 sec: 48485.5). Total num frames: 316801024. Throughput: 0: 12161.7. Samples: 4186092. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:35:48,850][03445] Avg episode reward: [(0, '55.247')] -[2024-07-05 13:35:48,853][06945] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000041114_316801024.pth... -[2024-07-05 13:35:48,923][06945] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000039689_305127424.pth -[2024-07-05 13:35:48,987][06965] Updated weights for policy 0, policy_version 41115 (0.0010) -[2024-07-05 13:35:50,654][06965] Updated weights for policy 0, policy_version 41125 (0.0007) -[2024-07-05 13:35:52,332][06965] Updated weights for policy 0, policy_version 41135 (0.0007) -[2024-07-05 13:35:53,849][03445] Fps is (10 sec: 49152.9, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 317046784. Throughput: 0: 12155.0. Samples: 4259020. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:35:53,850][03445] Avg episode reward: [(0, '53.556')] -[2024-07-05 13:35:54,010][06965] Updated weights for policy 0, policy_version 41145 (0.0010) -[2024-07-05 13:35:55,721][06965] Updated weights for policy 0, policy_version 41155 (0.0008) -[2024-07-05 13:35:57,432][06965] Updated weights for policy 0, policy_version 41165 (0.0007) -[2024-07-05 13:35:58,849][03445] Fps is (10 sec: 48333.7, 60 sec: 48605.8, 300 sec: 48513.3). Total num frames: 317284352. Throughput: 0: 12154.1. Samples: 4295244. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:35:58,850][03445] Avg episode reward: [(0, '52.301')] -[2024-07-05 13:35:59,104][06965] Updated weights for policy 0, policy_version 41175 (0.0008) -[2024-07-05 13:36:00,805][06965] Updated weights for policy 0, policy_version 41185 (0.0008) -[2024-07-05 13:36:02,488][06965] Updated weights for policy 0, policy_version 41195 (0.0007) -[2024-07-05 13:36:03,849][03445] Fps is (10 sec: 48333.2, 60 sec: 48606.0, 300 sec: 48541.1). Total num frames: 317530112. Throughput: 0: 12144.3. Samples: 4368352. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:36:03,850][03445] Avg episode reward: [(0, '53.126')] -[2024-07-05 13:36:04,174][06965] Updated weights for policy 0, policy_version 41205 (0.0008) -[2024-07-05 13:36:05,848][06965] Updated weights for policy 0, policy_version 41215 (0.0008) -[2024-07-05 13:36:07,528][06965] Updated weights for policy 0, policy_version 41225 (0.0009) -[2024-07-05 13:36:08,849][03445] Fps is (10 sec: 48333.0, 60 sec: 48606.0, 300 sec: 48541.1). Total num frames: 317767680. Throughput: 0: 12138.9. Samples: 4440888. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:36:08,850][03445] Avg episode reward: [(0, '53.961')] -[2024-07-05 13:36:09,237][06965] Updated weights for policy 0, policy_version 41235 (0.0008) -[2024-07-05 13:36:10,911][06965] Updated weights for policy 0, policy_version 41245 (0.0013) -[2024-07-05 13:36:12,607][06965] Updated weights for policy 0, policy_version 41255 (0.0008) -[2024-07-05 13:36:13,849][03445] Fps is (10 sec: 48332.2, 60 sec: 48605.8, 300 sec: 48541.1). Total num frames: 318013440. Throughput: 0: 12142.2. Samples: 4477132. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:36:13,850][03445] Avg episode reward: [(0, '54.127')] -[2024-07-05 13:36:14,323][06965] Updated weights for policy 0, policy_version 41265 (0.0008) -[2024-07-05 13:36:16,024][06965] Updated weights for policy 0, policy_version 41275 (0.0013) -[2024-07-05 13:36:17,699][06965] Updated weights for policy 0, policy_version 41285 (0.0007) -[2024-07-05 13:36:18,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48469.4, 300 sec: 48541.1). Total num frames: 318251008. Throughput: 0: 12118.6. Samples: 4549296. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:36:18,849][03445] Avg episode reward: [(0, '56.320')] -[2024-07-05 13:36:18,899][06945] Saving new best policy, reward=56.320! -[2024-07-05 13:36:19,446][06965] Updated weights for policy 0, policy_version 41295 (0.0008) -[2024-07-05 13:36:21,117][06965] Updated weights for policy 0, policy_version 41305 (0.0009) -[2024-07-05 13:36:22,779][06965] Updated weights for policy 0, policy_version 41315 (0.0008) -[2024-07-05 13:36:23,849][03445] Fps is (10 sec: 48333.3, 60 sec: 48469.4, 300 sec: 48568.9). Total num frames: 318496768. Throughput: 0: 12115.5. Samples: 4622340. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:36:23,849][03445] Avg episode reward: [(0, '55.148')] -[2024-07-05 13:36:24,463][06965] Updated weights for policy 0, policy_version 41325 (0.0008) -[2024-07-05 13:36:26,127][06965] Updated weights for policy 0, policy_version 41335 (0.0007) -[2024-07-05 13:36:27,805][06965] Updated weights for policy 0, policy_version 41345 (0.0008) -[2024-07-05 13:36:28,849][03445] Fps is (10 sec: 49151.4, 60 sec: 48605.8, 300 sec: 48596.7). Total num frames: 318742528. Throughput: 0: 12119.1. Samples: 4658932. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:36:28,850][03445] Avg episode reward: [(0, '51.738')] -[2024-07-05 13:36:29,498][06965] Updated weights for policy 0, policy_version 41355 (0.0008) -[2024-07-05 13:36:31,219][06965] Updated weights for policy 0, policy_version 41365 (0.0008) -[2024-07-05 13:36:32,879][06965] Updated weights for policy 0, policy_version 41375 (0.0007) -[2024-07-05 13:36:33,849][03445] Fps is (10 sec: 48332.3, 60 sec: 48469.3, 300 sec: 48596.6). Total num frames: 318980096. Throughput: 0: 12125.4. Samples: 4731732. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:36:33,850][03445] Avg episode reward: [(0, '54.151')] -[2024-07-05 13:36:34,568][06965] Updated weights for policy 0, policy_version 41385 (0.0007) -[2024-07-05 13:36:36,257][06965] Updated weights for policy 0, policy_version 41395 (0.0010) -[2024-07-05 13:36:37,906][06965] Updated weights for policy 0, policy_version 41405 (0.0007) -[2024-07-05 13:36:38,849][03445] Fps is (10 sec: 48333.3, 60 sec: 48469.3, 300 sec: 48624.4). Total num frames: 319225856. Throughput: 0: 12127.3. Samples: 4804748. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:36:38,849][03445] Avg episode reward: [(0, '52.118')] -[2024-07-05 13:36:39,629][06965] Updated weights for policy 0, policy_version 41415 (0.0011) -[2024-07-05 13:36:41,315][06965] Updated weights for policy 0, policy_version 41425 (0.0010) -[2024-07-05 13:36:43,016][06965] Updated weights for policy 0, policy_version 41435 (0.0008) -[2024-07-05 13:36:43,849][03445] Fps is (10 sec: 49152.0, 60 sec: 48606.0, 300 sec: 48652.1). Total num frames: 319471616. Throughput: 0: 12128.7. Samples: 4841036. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:36:43,850][03445] Avg episode reward: [(0, '52.741')] -[2024-07-05 13:36:44,683][06965] Updated weights for policy 0, policy_version 41445 (0.0008) -[2024-07-05 13:36:46,388][06965] Updated weights for policy 0, policy_version 41455 (0.0012) -[2024-07-05 13:36:48,136][06965] Updated weights for policy 0, policy_version 41465 (0.0010) -[2024-07-05 13:36:48,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48469.5, 300 sec: 48624.4). Total num frames: 319709184. Throughput: 0: 12117.2. Samples: 4913628. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:36:48,850][03445] Avg episode reward: [(0, '55.029')] -[2024-07-05 13:36:49,777][06965] Updated weights for policy 0, policy_version 41475 (0.0007) -[2024-07-05 13:36:51,461][06965] Updated weights for policy 0, policy_version 41485 (0.0008) -[2024-07-05 13:36:53,149][06965] Updated weights for policy 0, policy_version 41495 (0.0007) -[2024-07-05 13:36:53,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.4, 300 sec: 48652.2). Total num frames: 319954944. Throughput: 0: 12119.2. Samples: 4986252. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:36:53,849][03445] Avg episode reward: [(0, '54.105')] -[2024-07-05 13:36:54,854][06965] Updated weights for policy 0, policy_version 41505 (0.0007) -[2024-07-05 13:36:56,582][06965] Updated weights for policy 0, policy_version 41515 (0.0007) -[2024-07-05 13:36:58,272][06965] Updated weights for policy 0, policy_version 41525 (0.0007) -[2024-07-05 13:36:58,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48469.4, 300 sec: 48624.4). Total num frames: 320192512. Throughput: 0: 12114.7. Samples: 5022292. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:36:58,850][03445] Avg episode reward: [(0, '51.751')] -[2024-07-05 13:36:59,944][06965] Updated weights for policy 0, policy_version 41535 (0.0007) -[2024-07-05 13:37:01,635][06965] Updated weights for policy 0, policy_version 41545 (0.0008) -[2024-07-05 13:37:03,285][06965] Updated weights for policy 0, policy_version 41555 (0.0009) -[2024-07-05 13:37:03,848][03445] Fps is (10 sec: 48333.2, 60 sec: 48469.3, 300 sec: 48624.4). Total num frames: 320438272. Throughput: 0: 12134.1. Samples: 5095328. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:37:03,849][03445] Avg episode reward: [(0, '54.133')] -[2024-07-05 13:37:04,935][06965] Updated weights for policy 0, policy_version 41565 (0.0008) -[2024-07-05 13:37:06,628][06965] Updated weights for policy 0, policy_version 41575 (0.0010) -[2024-07-05 13:37:08,364][06965] Updated weights for policy 0, policy_version 41585 (0.0008) -[2024-07-05 13:37:08,849][03445] Fps is (10 sec: 49151.8, 60 sec: 48605.8, 300 sec: 48652.1). Total num frames: 320684032. Throughput: 0: 12133.3. Samples: 5168340. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:37:08,850][03445] Avg episode reward: [(0, '51.420')] -[2024-07-05 13:37:10,047][06965] Updated weights for policy 0, policy_version 41595 (0.0007) -[2024-07-05 13:37:11,740][06965] Updated weights for policy 0, policy_version 41605 (0.0014) -[2024-07-05 13:37:13,398][06965] Updated weights for policy 0, policy_version 41615 (0.0008) -[2024-07-05 13:37:13,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48469.4, 300 sec: 48624.4). Total num frames: 320921600. Throughput: 0: 12123.0. Samples: 5204464. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:37:13,850][03445] Avg episode reward: [(0, '55.239')] -[2024-07-05 13:37:15,089][06965] Updated weights for policy 0, policy_version 41625 (0.0008) -[2024-07-05 13:37:16,741][06965] Updated weights for policy 0, policy_version 41635 (0.0007) -[2024-07-05 13:37:18,447][06965] Updated weights for policy 0, policy_version 41645 (0.0008) -[2024-07-05 13:37:18,849][03445] Fps is (10 sec: 48333.0, 60 sec: 48605.9, 300 sec: 48624.4). Total num frames: 321167360. Throughput: 0: 12135.0. Samples: 5277804. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:37:18,850][03445] Avg episode reward: [(0, '49.845')] -[2024-07-05 13:37:20,112][06965] Updated weights for policy 0, policy_version 41655 (0.0010) -[2024-07-05 13:37:21,805][06965] Updated weights for policy 0, policy_version 41665 (0.0007) -[2024-07-05 13:37:23,500][06965] Updated weights for policy 0, policy_version 41675 (0.0009) -[2024-07-05 13:37:23,849][03445] Fps is (10 sec: 49151.6, 60 sec: 48605.8, 300 sec: 48624.4). Total num frames: 321413120. Throughput: 0: 12127.3. Samples: 5350476. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:37:23,850][03445] Avg episode reward: [(0, '54.873')] -[2024-07-05 13:37:25,172][06965] Updated weights for policy 0, policy_version 41685 (0.0010) -[2024-07-05 13:37:26,886][06965] Updated weights for policy 0, policy_version 41695 (0.0007) -[2024-07-05 13:37:28,557][06965] Updated weights for policy 0, policy_version 41705 (0.0007) -[2024-07-05 13:37:28,849][03445] Fps is (10 sec: 48332.2, 60 sec: 48469.3, 300 sec: 48624.4). Total num frames: 321650688. Throughput: 0: 12127.6. Samples: 5386776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:37:28,850][03445] Avg episode reward: [(0, '52.129')] -[2024-07-05 13:37:30,230][06965] Updated weights for policy 0, policy_version 41715 (0.0008) -[2024-07-05 13:37:31,965][06965] Updated weights for policy 0, policy_version 41725 (0.0008) -[2024-07-05 13:37:33,651][06965] Updated weights for policy 0, policy_version 41735 (0.0007) -[2024-07-05 13:37:33,849][03445] Fps is (10 sec: 48333.4, 60 sec: 48605.9, 300 sec: 48624.4). Total num frames: 321896448. Throughput: 0: 12141.5. Samples: 5459996. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:37:33,850][03445] Avg episode reward: [(0, '52.675')] -[2024-07-05 13:37:35,301][06965] Updated weights for policy 0, policy_version 41745 (0.0011) -[2024-07-05 13:37:36,979][06965] Updated weights for policy 0, policy_version 41755 (0.0009) -[2024-07-05 13:37:38,690][06965] Updated weights for policy 0, policy_version 41765 (0.0007) -[2024-07-05 13:37:38,849][03445] Fps is (10 sec: 48332.9, 60 sec: 48469.3, 300 sec: 48624.4). Total num frames: 322134016. Throughput: 0: 12143.7. Samples: 5532720. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:37:38,849][03445] Avg episode reward: [(0, '51.913')] -[2024-07-05 13:37:40,379][06965] Updated weights for policy 0, policy_version 41775 (0.0007) -[2024-07-05 13:37:42,055][06965] Updated weights for policy 0, policy_version 41785 (0.0007) -[2024-07-05 13:37:43,724][06965] Updated weights for policy 0, policy_version 41795 (0.0009) -[2024-07-05 13:37:43,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48469.4, 300 sec: 48624.4). Total num frames: 322379776. Throughput: 0: 12154.7. Samples: 5569252. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:37:43,849][03445] Avg episode reward: [(0, '55.298')] -[2024-07-05 13:37:45,434][06965] Updated weights for policy 0, policy_version 41805 (0.0008) -[2024-07-05 13:37:47,126][06965] Updated weights for policy 0, policy_version 41815 (0.0010) -[2024-07-05 13:37:48,794][06965] Updated weights for policy 0, policy_version 41825 (0.0008) -[2024-07-05 13:37:48,849][03445] Fps is (10 sec: 49152.2, 60 sec: 48605.8, 300 sec: 48624.4). Total num frames: 322625536. Throughput: 0: 12153.3. Samples: 5642228. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:37:48,849][03445] Avg episode reward: [(0, '55.345')] -[2024-07-05 13:37:48,853][06945] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000041825_322625536.pth... -[2024-07-05 13:37:48,918][06945] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000040401_310960128.pth -[2024-07-05 13:37:50,488][06965] Updated weights for policy 0, policy_version 41835 (0.0008) -[2024-07-05 13:37:52,201][06965] Updated weights for policy 0, policy_version 41845 (0.0008) -[2024-07-05 13:37:53,849][03445] Fps is (10 sec: 48333.0, 60 sec: 48469.4, 300 sec: 48596.6). Total num frames: 322863104. Throughput: 0: 12136.4. Samples: 5714476. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:37:53,850][03445] Avg episode reward: [(0, '52.820')] -[2024-07-05 13:37:53,904][06965] Updated weights for policy 0, policy_version 41855 (0.0008) -[2024-07-05 13:37:55,585][06965] Updated weights for policy 0, policy_version 41865 (0.0007) -[2024-07-05 13:37:57,295][06965] Updated weights for policy 0, policy_version 41875 (0.0008) -[2024-07-05 13:37:58,849][03445] Fps is (10 sec: 48332.9, 60 sec: 48605.9, 300 sec: 48596.6). Total num frames: 323108864. Throughput: 0: 12144.4. Samples: 5750960. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:37:58,850][03445] Avg episode reward: [(0, '53.611')] -[2024-07-05 13:37:58,948][06965] Updated weights for policy 0, policy_version 41885 (0.0010) -[2024-07-05 13:38:00,636][06965] Updated weights for policy 0, policy_version 41895 (0.0010) -[2024-07-05 13:38:02,322][06965] Updated weights for policy 0, policy_version 41905 (0.0007) -[2024-07-05 13:38:03,849][03445] Fps is (10 sec: 48332.7, 60 sec: 48469.3, 300 sec: 48596.6). Total num frames: 323346432. Throughput: 0: 12126.4. Samples: 5823492. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:38:03,850][03445] Avg episode reward: [(0, '54.777')] -[2024-07-05 13:38:04,019][06965] Updated weights for policy 0, policy_version 41915 (0.0008) -[2024-07-05 13:38:05,740][06965] Updated weights for policy 0, policy_version 41925 (0.0008) -[2024-07-05 13:38:07,426][06965] Updated weights for policy 0, policy_version 41935 (0.0008) -[2024-07-05 13:38:08,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48469.3, 300 sec: 48596.6). Total num frames: 323592192. Throughput: 0: 12136.6. Samples: 5896624. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:38:08,850][03445] Avg episode reward: [(0, '54.697')] -[2024-07-05 13:38:09,123][06965] Updated weights for policy 0, policy_version 41945 (0.0008) -[2024-07-05 13:38:10,766][06965] Updated weights for policy 0, policy_version 41955 (0.0009) -[2024-07-05 13:38:12,488][06965] Updated weights for policy 0, policy_version 41965 (0.0007) -[2024-07-05 13:38:13,849][03445] Fps is (10 sec: 49151.7, 60 sec: 48605.8, 300 sec: 48596.6). Total num frames: 323837952. Throughput: 0: 12138.1. Samples: 5932988. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:38:13,850][03445] Avg episode reward: [(0, '52.814')] -[2024-07-05 13:38:14,150][06965] Updated weights for policy 0, policy_version 41975 (0.0009) -[2024-07-05 13:38:15,837][06965] Updated weights for policy 0, policy_version 41985 (0.0007) -[2024-07-05 13:38:17,512][06965] Updated weights for policy 0, policy_version 41995 (0.0010) -[2024-07-05 13:38:18,849][03445] Fps is (10 sec: 49151.5, 60 sec: 48605.7, 300 sec: 48596.6). Total num frames: 324083712. Throughput: 0: 12128.6. Samples: 6005784. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:38:18,850][03445] Avg episode reward: [(0, '52.694')] -[2024-07-05 13:38:19,194][06965] Updated weights for policy 0, policy_version 42005 (0.0008) -[2024-07-05 13:38:20,918][06965] Updated weights for policy 0, policy_version 42015 (0.0009) -[2024-07-05 13:38:22,607][06965] Updated weights for policy 0, policy_version 42025 (0.0010) -[2024-07-05 13:38:23,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.4, 300 sec: 48596.6). Total num frames: 324321280. Throughput: 0: 12133.9. Samples: 6078744. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:38:23,850][03445] Avg episode reward: [(0, '51.024')] -[2024-07-05 13:38:24,234][06965] Updated weights for policy 0, policy_version 42035 (0.0010) -[2024-07-05 13:38:25,961][06965] Updated weights for policy 0, policy_version 42045 (0.0008) -[2024-07-05 13:38:27,627][06965] Updated weights for policy 0, policy_version 42055 (0.0008) -[2024-07-05 13:38:28,849][03445] Fps is (10 sec: 48333.0, 60 sec: 48605.8, 300 sec: 48596.6). Total num frames: 324567040. Throughput: 0: 12133.8. Samples: 6115272. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:38:28,850][03445] Avg episode reward: [(0, '51.378')] -[2024-07-05 13:38:29,313][06965] Updated weights for policy 0, policy_version 42065 (0.0008) -[2024-07-05 13:38:30,998][06965] Updated weights for policy 0, policy_version 42075 (0.0008) -[2024-07-05 13:38:32,732][06965] Updated weights for policy 0, policy_version 42085 (0.0008) -[2024-07-05 13:38:33,848][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.4, 300 sec: 48568.9). Total num frames: 324804608. Throughput: 0: 12117.5. Samples: 6187516. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:38:33,850][03445] Avg episode reward: [(0, '54.757')] -[2024-07-05 13:38:34,427][06965] Updated weights for policy 0, policy_version 42095 (0.0009) -[2024-07-05 13:38:36,111][06965] Updated weights for policy 0, policy_version 42105 (0.0011) -[2024-07-05 13:38:37,829][06965] Updated weights for policy 0, policy_version 42115 (0.0012) -[2024-07-05 13:38:38,849][03445] Fps is (10 sec: 48333.6, 60 sec: 48605.9, 300 sec: 48596.6). Total num frames: 325050368. Throughput: 0: 12137.8. Samples: 6260676. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:38:38,850][03445] Avg episode reward: [(0, '56.038')] -[2024-07-05 13:38:39,500][06965] Updated weights for policy 0, policy_version 42125 (0.0007) -[2024-07-05 13:38:41,226][06965] Updated weights for policy 0, policy_version 42135 (0.0010) -[2024-07-05 13:38:42,904][06965] Updated weights for policy 0, policy_version 42145 (0.0007) -[2024-07-05 13:38:43,849][03445] Fps is (10 sec: 48331.4, 60 sec: 48469.2, 300 sec: 48568.8). Total num frames: 325287936. Throughput: 0: 12129.0. Samples: 6296768. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:38:43,851][03445] Avg episode reward: [(0, '51.753')] -[2024-07-05 13:38:44,577][06965] Updated weights for policy 0, policy_version 42155 (0.0010) -[2024-07-05 13:38:46,277][06965] Updated weights for policy 0, policy_version 42165 (0.0008) -[2024-07-05 13:38:47,918][06965] Updated weights for policy 0, policy_version 42175 (0.0008) -[2024-07-05 13:38:48,849][03445] Fps is (10 sec: 48332.7, 60 sec: 48469.4, 300 sec: 48568.8). Total num frames: 325533696. Throughput: 0: 12142.4. Samples: 6369900. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:38:48,850][03445] Avg episode reward: [(0, '54.890')] -[2024-07-05 13:38:49,594][06965] Updated weights for policy 0, policy_version 42185 (0.0007) -[2024-07-05 13:38:51,265][06965] Updated weights for policy 0, policy_version 42195 (0.0010) -[2024-07-05 13:38:52,971][06965] Updated weights for policy 0, policy_version 42205 (0.0008) -[2024-07-05 13:38:53,849][03445] Fps is (10 sec: 48333.6, 60 sec: 48469.3, 300 sec: 48541.1). Total num frames: 325771264. Throughput: 0: 12130.1. Samples: 6442480. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:38:53,849][03445] Avg episode reward: [(0, '54.197')] -[2024-07-05 13:38:54,664][06965] Updated weights for policy 0, policy_version 42215 (0.0008) -[2024-07-05 13:38:56,330][06965] Updated weights for policy 0, policy_version 42225 (0.0008) -[2024-07-05 13:38:58,068][06965] Updated weights for policy 0, policy_version 42235 (0.0008) -[2024-07-05 13:38:58,849][03445] Fps is (10 sec: 48332.6, 60 sec: 48469.3, 300 sec: 48568.8). Total num frames: 326017024. Throughput: 0: 12129.9. Samples: 6478832. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:38:58,850][03445] Avg episode reward: [(0, '52.441')] -[2024-07-05 13:38:59,751][06965] Updated weights for policy 0, policy_version 42245 (0.0008) -[2024-07-05 13:39:01,469][06965] Updated weights for policy 0, policy_version 42255 (0.0008) -[2024-07-05 13:39:03,126][06965] Updated weights for policy 0, policy_version 42265 (0.0008) -[2024-07-05 13:39:03,849][03445] Fps is (10 sec: 49152.2, 60 sec: 48605.9, 300 sec: 48568.8). Total num frames: 326262784. Throughput: 0: 12135.2. Samples: 6551868. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:39:03,850][03445] Avg episode reward: [(0, '51.351')] -[2024-07-05 13:39:04,828][06965] Updated weights for policy 0, policy_version 42275 (0.0007) -[2024-07-05 13:39:06,500][06965] Updated weights for policy 0, policy_version 42285 (0.0008) -[2024-07-05 13:39:08,145][06965] Updated weights for policy 0, policy_version 42295 (0.0008) -[2024-07-05 13:39:08,849][03445] Fps is (10 sec: 49152.2, 60 sec: 48605.9, 300 sec: 48568.9). Total num frames: 326508544. Throughput: 0: 12136.0. Samples: 6624864. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:39:08,850][03445] Avg episode reward: [(0, '52.450')] -[2024-07-05 13:39:09,776][06965] Updated weights for policy 0, policy_version 42305 (0.0007) -[2024-07-05 13:39:11,506][06965] Updated weights for policy 0, policy_version 42315 (0.0008) -[2024-07-05 13:39:13,203][06965] Updated weights for policy 0, policy_version 42325 (0.0008) -[2024-07-05 13:39:13,848][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.4, 300 sec: 48541.1). Total num frames: 326746112. Throughput: 0: 12127.9. Samples: 6661024. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:39:13,849][03445] Avg episode reward: [(0, '52.070')] -[2024-07-05 13:39:14,878][06965] Updated weights for policy 0, policy_version 42335 (0.0008) -[2024-07-05 13:39:16,643][06965] Updated weights for policy 0, policy_version 42345 (0.0008) -[2024-07-05 13:39:18,323][06965] Updated weights for policy 0, policy_version 42355 (0.0008) -[2024-07-05 13:39:18,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48469.4, 300 sec: 48568.8). Total num frames: 326991872. Throughput: 0: 12134.1. Samples: 6733552. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:39:18,850][03445] Avg episode reward: [(0, '53.814')] -[2024-07-05 13:39:19,987][06965] Updated weights for policy 0, policy_version 42365 (0.0008) -[2024-07-05 13:39:21,655][06965] Updated weights for policy 0, policy_version 42375 (0.0011) -[2024-07-05 13:39:23,377][06965] Updated weights for policy 0, policy_version 42385 (0.0008) -[2024-07-05 13:39:23,848][03445] Fps is (10 sec: 48333.0, 60 sec: 48469.4, 300 sec: 48541.1). Total num frames: 327229440. Throughput: 0: 12127.1. Samples: 6806396. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:39:23,850][03445] Avg episode reward: [(0, '53.790')] -[2024-07-05 13:39:25,025][06965] Updated weights for policy 0, policy_version 42395 (0.0007) -[2024-07-05 13:39:26,720][06965] Updated weights for policy 0, policy_version 42405 (0.0008) -[2024-07-05 13:39:28,399][06965] Updated weights for policy 0, policy_version 42415 (0.0010) -[2024-07-05 13:39:28,849][03445] Fps is (10 sec: 48331.8, 60 sec: 48469.2, 300 sec: 48541.0). Total num frames: 327475200. Throughput: 0: 12137.9. Samples: 6842972. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:39:28,850][03445] Avg episode reward: [(0, '56.186')] -[2024-07-05 13:39:30,097][06965] Updated weights for policy 0, policy_version 42425 (0.0007) -[2024-07-05 13:39:31,813][06965] Updated weights for policy 0, policy_version 42435 (0.0010) -[2024-07-05 13:39:33,445][06965] Updated weights for policy 0, policy_version 42445 (0.0007) -[2024-07-05 13:39:33,849][03445] Fps is (10 sec: 49151.4, 60 sec: 48605.8, 300 sec: 48568.8). Total num frames: 327720960. Throughput: 0: 12139.3. Samples: 6916168. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:39:33,849][03445] Avg episode reward: [(0, '51.079')] -[2024-07-05 13:39:35,149][06965] Updated weights for policy 0, policy_version 42455 (0.0008) -[2024-07-05 13:39:36,854][06965] Updated weights for policy 0, policy_version 42465 (0.0008) -[2024-07-05 13:39:38,550][06965] Updated weights for policy 0, policy_version 42475 (0.0010) -[2024-07-05 13:39:38,849][03445] Fps is (10 sec: 48333.7, 60 sec: 48469.3, 300 sec: 48541.1). Total num frames: 327958528. Throughput: 0: 12139.2. Samples: 6988744. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:39:38,850][03445] Avg episode reward: [(0, '53.259')] -[2024-07-05 13:39:40,251][06965] Updated weights for policy 0, policy_version 42485 (0.0008) -[2024-07-05 13:39:41,914][06965] Updated weights for policy 0, policy_version 42495 (0.0008) -[2024-07-05 13:39:43,593][06965] Updated weights for policy 0, policy_version 42505 (0.0010) -[2024-07-05 13:39:43,849][03445] Fps is (10 sec: 48333.0, 60 sec: 48606.1, 300 sec: 48541.1). Total num frames: 328204288. Throughput: 0: 12141.3. Samples: 7025192. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:39:43,850][03445] Avg episode reward: [(0, '52.238')] -[2024-07-05 13:39:45,272][06965] Updated weights for policy 0, policy_version 42515 (0.0007) -[2024-07-05 13:39:46,951][06965] Updated weights for policy 0, policy_version 42525 (0.0008) -[2024-07-05 13:39:48,657][06965] Updated weights for policy 0, policy_version 42535 (0.0011) -[2024-07-05 13:39:48,849][03445] Fps is (10 sec: 49152.3, 60 sec: 48605.8, 300 sec: 48541.1). Total num frames: 328450048. Throughput: 0: 12134.6. Samples: 7097924. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:39:48,850][03445] Avg episode reward: [(0, '52.973')] -[2024-07-05 13:39:48,853][06945] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000042536_328450048.pth... -[2024-07-05 13:39:48,921][06945] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000041114_316801024.pth -[2024-07-05 13:39:50,368][06965] Updated weights for policy 0, policy_version 42545 (0.0008) -[2024-07-05 13:39:52,042][06965] Updated weights for policy 0, policy_version 42555 (0.0010) -[2024-07-05 13:39:53,743][06965] Updated weights for policy 0, policy_version 42565 (0.0008) -[2024-07-05 13:39:53,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 328687616. Throughput: 0: 12113.5. Samples: 7169972. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:39:53,850][03445] Avg episode reward: [(0, '54.587')] -[2024-07-05 13:39:55,437][06965] Updated weights for policy 0, policy_version 42575 (0.0007) -[2024-07-05 13:39:57,171][06965] Updated weights for policy 0, policy_version 42585 (0.0008) -[2024-07-05 13:39:58,849][03445] Fps is (10 sec: 47513.8, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 328925184. Throughput: 0: 12125.2. Samples: 7206660. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:39:58,850][06965] Updated weights for policy 0, policy_version 42595 (0.0008) -[2024-07-05 13:39:58,849][03445] Avg episode reward: [(0, '51.642')] -[2024-07-05 13:40:00,563][06965] Updated weights for policy 0, policy_version 42605 (0.0008) -[2024-07-05 13:40:02,201][06965] Updated weights for policy 0, policy_version 42615 (0.0008) -[2024-07-05 13:40:03,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48469.4, 300 sec: 48541.1). Total num frames: 329170944. Throughput: 0: 12129.3. Samples: 7279368. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:40:03,850][03445] Avg episode reward: [(0, '51.897')] -[2024-07-05 13:40:03,888][06965] Updated weights for policy 0, policy_version 42625 (0.0012) -[2024-07-05 13:40:05,588][06965] Updated weights for policy 0, policy_version 42635 (0.0009) -[2024-07-05 13:40:07,292][06965] Updated weights for policy 0, policy_version 42645 (0.0010) -[2024-07-05 13:40:08,849][03445] Fps is (10 sec: 49151.4, 60 sec: 48469.2, 300 sec: 48541.1). Total num frames: 329416704. Throughput: 0: 12130.4. Samples: 7352268. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:40:08,850][03445] Avg episode reward: [(0, '52.415')] -[2024-07-05 13:40:08,957][06965] Updated weights for policy 0, policy_version 42655 (0.0008) -[2024-07-05 13:40:10,642][06965] Updated weights for policy 0, policy_version 42665 (0.0007) -[2024-07-05 13:40:12,294][06965] Updated weights for policy 0, policy_version 42675 (0.0008) -[2024-07-05 13:40:13,848][03445] Fps is (10 sec: 48333.2, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 329654272. Throughput: 0: 12129.1. Samples: 7388776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:40:13,849][03445] Avg episode reward: [(0, '52.538')] -[2024-07-05 13:40:14,000][06965] Updated weights for policy 0, policy_version 42685 (0.0007) -[2024-07-05 13:40:15,678][06965] Updated weights for policy 0, policy_version 42695 (0.0008) -[2024-07-05 13:40:17,379][06965] Updated weights for policy 0, policy_version 42705 (0.0008) -[2024-07-05 13:40:18,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 329900032. Throughput: 0: 12116.1. Samples: 7461392. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:40:18,850][03445] Avg episode reward: [(0, '52.705')] -[2024-07-05 13:40:19,091][06965] Updated weights for policy 0, policy_version 42715 (0.0011) -[2024-07-05 13:40:20,761][06965] Updated weights for policy 0, policy_version 42725 (0.0008) -[2024-07-05 13:40:22,441][06965] Updated weights for policy 0, policy_version 42735 (0.0008) -[2024-07-05 13:40:23,849][03445] Fps is (10 sec: 49151.1, 60 sec: 48605.7, 300 sec: 48541.1). Total num frames: 330145792. Throughput: 0: 12116.4. Samples: 7533984. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:40:23,850][03445] Avg episode reward: [(0, '52.833')] -[2024-07-05 13:40:24,154][06965] Updated weights for policy 0, policy_version 42745 (0.0008) -[2024-07-05 13:40:25,852][06965] Updated weights for policy 0, policy_version 42755 (0.0008) -[2024-07-05 13:40:27,565][06965] Updated weights for policy 0, policy_version 42765 (0.0009) -[2024-07-05 13:40:28,848][03445] Fps is (10 sec: 48333.6, 60 sec: 48469.6, 300 sec: 48513.3). Total num frames: 330383360. Throughput: 0: 12112.3. Samples: 7570244. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:40:28,849][03445] Avg episode reward: [(0, '52.737')] -[2024-07-05 13:40:29,228][06965] Updated weights for policy 0, policy_version 42775 (0.0010) -[2024-07-05 13:40:30,917][06965] Updated weights for policy 0, policy_version 42785 (0.0007) -[2024-07-05 13:40:32,590][06965] Updated weights for policy 0, policy_version 42795 (0.0007) -[2024-07-05 13:40:33,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 330629120. Throughput: 0: 12122.6. Samples: 7643440. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:40:33,849][03445] Avg episode reward: [(0, '54.143')] -[2024-07-05 13:40:34,286][06965] Updated weights for policy 0, policy_version 42805 (0.0008) -[2024-07-05 13:40:36,010][06965] Updated weights for policy 0, policy_version 42815 (0.0007) -[2024-07-05 13:40:37,695][06965] Updated weights for policy 0, policy_version 42825 (0.0007) -[2024-07-05 13:40:38,849][03445] Fps is (10 sec: 49151.7, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 330874880. Throughput: 0: 12133.6. Samples: 7715984. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:40:38,849][03445] Avg episode reward: [(0, '50.664')] -[2024-07-05 13:40:39,431][06965] Updated weights for policy 0, policy_version 42835 (0.0010) -[2024-07-05 13:40:41,075][06965] Updated weights for policy 0, policy_version 42845 (0.0011) -[2024-07-05 13:40:42,762][06965] Updated weights for policy 0, policy_version 42855 (0.0008) -[2024-07-05 13:40:43,849][03445] Fps is (10 sec: 48332.6, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 331112448. Throughput: 0: 12130.6. Samples: 7752536. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:40:43,850][03445] Avg episode reward: [(0, '50.681')] -[2024-07-05 13:40:44,400][06965] Updated weights for policy 0, policy_version 42865 (0.0007) -[2024-07-05 13:40:46,094][06965] Updated weights for policy 0, policy_version 42875 (0.0007) -[2024-07-05 13:40:47,772][06965] Updated weights for policy 0, policy_version 42885 (0.0007) -[2024-07-05 13:40:48,849][03445] Fps is (10 sec: 48332.9, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 331358208. Throughput: 0: 12134.5. Samples: 7825420. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:40:48,849][03445] Avg episode reward: [(0, '54.159')] -[2024-07-05 13:40:49,466][06965] Updated weights for policy 0, policy_version 42895 (0.0007) -[2024-07-05 13:40:51,138][06965] Updated weights for policy 0, policy_version 42905 (0.0008) -[2024-07-05 13:40:52,816][06965] Updated weights for policy 0, policy_version 42915 (0.0010) -[2024-07-05 13:40:53,848][03445] Fps is (10 sec: 48333.6, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 331595776. Throughput: 0: 12126.0. Samples: 7897936. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 13:40:53,850][03445] Avg episode reward: [(0, '52.952')] -[2024-07-05 13:40:54,549][06965] Updated weights for policy 0, policy_version 42925 (0.0007) -[2024-07-05 13:40:56,225][06965] Updated weights for policy 0, policy_version 42935 (0.0007) -[2024-07-05 13:40:57,917][06965] Updated weights for policy 0, policy_version 42945 (0.0008) -[2024-07-05 13:40:58,848][03445] Fps is (10 sec: 48332.8, 60 sec: 48605.9, 300 sec: 48513.3). Total num frames: 331841536. Throughput: 0: 12124.0. Samples: 7934356. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 13:40:58,849][03445] Avg episode reward: [(0, '53.888')] -[2024-07-05 13:40:59,644][06965] Updated weights for policy 0, policy_version 42955 (0.0008) -[2024-07-05 13:41:01,296][06965] Updated weights for policy 0, policy_version 42965 (0.0010) -[2024-07-05 13:41:03,033][06965] Updated weights for policy 0, policy_version 42975 (0.0008) -[2024-07-05 13:41:03,848][03445] Fps is (10 sec: 49151.8, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 332087296. Throughput: 0: 12125.1. Samples: 8007020. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 13:41:03,849][03445] Avg episode reward: [(0, '51.501')] -[2024-07-05 13:41:04,680][06965] Updated weights for policy 0, policy_version 42985 (0.0008) -[2024-07-05 13:41:06,344][06965] Updated weights for policy 0, policy_version 42995 (0.0008) -[2024-07-05 13:41:08,046][06965] Updated weights for policy 0, policy_version 43005 (0.0013) -[2024-07-05 13:41:08,849][03445] Fps is (10 sec: 48332.6, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 332324864. Throughput: 0: 12131.0. Samples: 8079876. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 13:41:08,850][03445] Avg episode reward: [(0, '52.157')] -[2024-07-05 13:41:09,716][06965] Updated weights for policy 0, policy_version 43015 (0.0009) -[2024-07-05 13:41:11,422][06965] Updated weights for policy 0, policy_version 43025 (0.0008) -[2024-07-05 13:41:13,087][06965] Updated weights for policy 0, policy_version 43035 (0.0008) -[2024-07-05 13:41:13,849][03445] Fps is (10 sec: 48332.7, 60 sec: 48605.8, 300 sec: 48541.1). Total num frames: 332570624. Throughput: 0: 12134.9. Samples: 8116316. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 13:41:13,849][03445] Avg episode reward: [(0, '52.347')] -[2024-07-05 13:41:14,783][06965] Updated weights for policy 0, policy_version 43045 (0.0008) -[2024-07-05 13:41:16,473][06965] Updated weights for policy 0, policy_version 43055 (0.0008) -[2024-07-05 13:41:18,186][06965] Updated weights for policy 0, policy_version 43065 (0.0011) -[2024-07-05 13:41:18,849][03445] Fps is (10 sec: 48332.4, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 332808192. Throughput: 0: 12129.5. Samples: 8189268. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:41:18,850][03445] Avg episode reward: [(0, '49.967')] -[2024-07-05 13:41:19,870][06965] Updated weights for policy 0, policy_version 43075 (0.0007) -[2024-07-05 13:41:21,549][06965] Updated weights for policy 0, policy_version 43085 (0.0008) -[2024-07-05 13:41:23,261][06965] Updated weights for policy 0, policy_version 43095 (0.0008) -[2024-07-05 13:41:23,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 333053952. Throughput: 0: 12129.5. Samples: 8261812. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:41:23,850][03445] Avg episode reward: [(0, '52.316')] -[2024-07-05 13:41:24,935][06965] Updated weights for policy 0, policy_version 43105 (0.0007) -[2024-07-05 13:41:26,662][06965] Updated weights for policy 0, policy_version 43115 (0.0008) -[2024-07-05 13:41:28,348][06965] Updated weights for policy 0, policy_version 43125 (0.0008) -[2024-07-05 13:41:28,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 333291520. Throughput: 0: 12124.6. Samples: 8298144. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:41:28,850][03445] Avg episode reward: [(0, '53.950')] -[2024-07-05 13:41:30,064][06965] Updated weights for policy 0, policy_version 43135 (0.0007) -[2024-07-05 13:41:31,749][06965] Updated weights for policy 0, policy_version 43145 (0.0009) -[2024-07-05 13:41:33,462][06965] Updated weights for policy 0, policy_version 43155 (0.0008) -[2024-07-05 13:41:33,848][03445] Fps is (10 sec: 48333.4, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 333537280. Throughput: 0: 12117.3. Samples: 8370696. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:41:33,849][03445] Avg episode reward: [(0, '53.600')] -[2024-07-05 13:41:35,102][06965] Updated weights for policy 0, policy_version 43165 (0.0008) -[2024-07-05 13:41:36,822][06965] Updated weights for policy 0, policy_version 43175 (0.0011) -[2024-07-05 13:41:38,490][06965] Updated weights for policy 0, policy_version 43185 (0.0007) -[2024-07-05 13:41:38,849][03445] Fps is (10 sec: 49152.1, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 333783040. Throughput: 0: 12123.2. Samples: 8443480. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:41:38,850][03445] Avg episode reward: [(0, '53.075')] -[2024-07-05 13:41:40,151][06965] Updated weights for policy 0, policy_version 43195 (0.0008) -[2024-07-05 13:41:41,844][06965] Updated weights for policy 0, policy_version 43205 (0.0007) -[2024-07-05 13:41:43,545][06965] Updated weights for policy 0, policy_version 43215 (0.0008) -[2024-07-05 13:41:43,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 334020608. Throughput: 0: 12120.4. Samples: 8479776. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:41:43,849][03445] Avg episode reward: [(0, '53.723')] -[2024-07-05 13:41:45,237][06965] Updated weights for policy 0, policy_version 43225 (0.0010) -[2024-07-05 13:41:46,932][06965] Updated weights for policy 0, policy_version 43235 (0.0008) -[2024-07-05 13:41:48,609][06965] Updated weights for policy 0, policy_version 43245 (0.0008) -[2024-07-05 13:41:48,849][03445] Fps is (10 sec: 48332.7, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 334266368. Throughput: 0: 12123.5. Samples: 8552580. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 13:41:48,849][03445] Avg episode reward: [(0, '55.387')] -[2024-07-05 13:41:48,854][06945] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000043246_334266368.pth... -[2024-07-05 13:41:48,927][06945] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000041825_322625536.pth -[2024-07-05 13:41:50,301][06965] Updated weights for policy 0, policy_version 43255 (0.0008) -[2024-07-05 13:41:51,971][06965] Updated weights for policy 0, policy_version 43265 (0.0008) -[2024-07-05 13:41:53,697][06965] Updated weights for policy 0, policy_version 43275 (0.0008) -[2024-07-05 13:41:53,849][03445] Fps is (10 sec: 49151.9, 60 sec: 48605.8, 300 sec: 48541.1). Total num frames: 334512128. Throughput: 0: 12124.7. Samples: 8625488. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 13:41:53,850][03445] Avg episode reward: [(0, '50.524')] -[2024-07-05 13:41:55,391][06965] Updated weights for policy 0, policy_version 43285 (0.0008) -[2024-07-05 13:41:57,059][06965] Updated weights for policy 0, policy_version 43295 (0.0008) -[2024-07-05 13:41:58,709][06965] Updated weights for policy 0, policy_version 43305 (0.0008) -[2024-07-05 13:41:58,849][03445] Fps is (10 sec: 48333.0, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 334749696. Throughput: 0: 12122.8. Samples: 8661844. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 13:41:58,850][03445] Avg episode reward: [(0, '53.520')] -[2024-07-05 13:42:00,428][06965] Updated weights for policy 0, policy_version 43315 (0.0008) -[2024-07-05 13:42:02,106][06965] Updated weights for policy 0, policy_version 43325 (0.0008) -[2024-07-05 13:42:03,823][06965] Updated weights for policy 0, policy_version 43335 (0.0010) -[2024-07-05 13:42:03,849][03445] Fps is (10 sec: 48332.3, 60 sec: 48469.2, 300 sec: 48513.3). Total num frames: 334995456. Throughput: 0: 12125.0. Samples: 8734892. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 13:42:03,850][03445] Avg episode reward: [(0, '52.091')] -[2024-07-05 13:42:05,495][06965] Updated weights for policy 0, policy_version 43345 (0.0007) -[2024-07-05 13:42:07,148][06965] Updated weights for policy 0, policy_version 43355 (0.0007) -[2024-07-05 13:42:08,835][06965] Updated weights for policy 0, policy_version 43365 (0.0009) -[2024-07-05 13:42:08,849][03445] Fps is (10 sec: 49151.7, 60 sec: 48605.8, 300 sec: 48541.1). Total num frames: 335241216. Throughput: 0: 12127.1. Samples: 8807532. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 13:42:08,849][03445] Avg episode reward: [(0, '53.532')] -[2024-07-05 13:42:10,518][06965] Updated weights for policy 0, policy_version 43375 (0.0007) -[2024-07-05 13:42:12,204][06965] Updated weights for policy 0, policy_version 43385 (0.0009) -[2024-07-05 13:42:13,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 335478784. Throughput: 0: 12128.9. Samples: 8843944. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:42:13,849][03445] Avg episode reward: [(0, '53.744')] -[2024-07-05 13:42:13,902][06965] Updated weights for policy 0, policy_version 43395 (0.0008) -[2024-07-05 13:42:15,621][06965] Updated weights for policy 0, policy_version 43405 (0.0007) -[2024-07-05 13:42:17,328][06965] Updated weights for policy 0, policy_version 43415 (0.0007) -[2024-07-05 13:42:18,849][03445] Fps is (10 sec: 48333.2, 60 sec: 48606.0, 300 sec: 48513.3). Total num frames: 335724544. Throughput: 0: 12133.1. Samples: 8916688. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:42:18,850][03445] Avg episode reward: [(0, '54.108')] -[2024-07-05 13:42:19,006][06965] Updated weights for policy 0, policy_version 43425 (0.0007) -[2024-07-05 13:42:20,672][06965] Updated weights for policy 0, policy_version 43435 (0.0008) -[2024-07-05 13:42:22,335][06965] Updated weights for policy 0, policy_version 43445 (0.0008) -[2024-07-05 13:42:23,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 335962112. Throughput: 0: 12125.7. Samples: 8989136. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:42:23,849][03445] Avg episode reward: [(0, '53.268')] -[2024-07-05 13:42:24,035][06965] Updated weights for policy 0, policy_version 43455 (0.0010) -[2024-07-05 13:42:25,772][06965] Updated weights for policy 0, policy_version 43465 (0.0007) -[2024-07-05 13:42:27,466][06965] Updated weights for policy 0, policy_version 43475 (0.0008) -[2024-07-05 13:42:28,849][03445] Fps is (10 sec: 48332.7, 60 sec: 48605.9, 300 sec: 48513.3). Total num frames: 336207872. Throughput: 0: 12131.1. Samples: 9025676. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:42:28,850][03445] Avg episode reward: [(0, '54.650')] -[2024-07-05 13:42:29,169][06965] Updated weights for policy 0, policy_version 43485 (0.0007) -[2024-07-05 13:42:30,853][06965] Updated weights for policy 0, policy_version 43495 (0.0008) -[2024-07-05 13:42:32,485][06965] Updated weights for policy 0, policy_version 43505 (0.0008) -[2024-07-05 13:42:33,849][03445] Fps is (10 sec: 49152.0, 60 sec: 48605.8, 300 sec: 48541.1). Total num frames: 336453632. Throughput: 0: 12133.5. Samples: 9098588. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:42:33,850][03445] Avg episode reward: [(0, '54.774')] -[2024-07-05 13:42:34,130][06965] Updated weights for policy 0, policy_version 43515 (0.0007) -[2024-07-05 13:42:35,820][06965] Updated weights for policy 0, policy_version 43525 (0.0007) -[2024-07-05 13:42:37,556][06965] Updated weights for policy 0, policy_version 43535 (0.0008) -[2024-07-05 13:42:38,848][03445] Fps is (10 sec: 48333.0, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 336691200. Throughput: 0: 12134.6. Samples: 9171544. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 13:42:38,849][03445] Avg episode reward: [(0, '54.113')] -[2024-07-05 13:42:39,237][06965] Updated weights for policy 0, policy_version 43545 (0.0008) -[2024-07-05 13:42:40,924][06965] Updated weights for policy 0, policy_version 43555 (0.0007) -[2024-07-05 13:42:42,607][06965] Updated weights for policy 0, policy_version 43565 (0.0010) -[2024-07-05 13:42:43,849][03445] Fps is (10 sec: 48332.6, 60 sec: 48605.8, 300 sec: 48513.3). Total num frames: 336936960. Throughput: 0: 12133.9. Samples: 9207872. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:42:43,849][03445] Avg episode reward: [(0, '55.976')] -[2024-07-05 13:42:44,305][06965] Updated weights for policy 0, policy_version 43575 (0.0008) -[2024-07-05 13:42:45,970][06965] Updated weights for policy 0, policy_version 43585 (0.0010) -[2024-07-05 13:42:47,627][06965] Updated weights for policy 0, policy_version 43595 (0.0007) -[2024-07-05 13:42:48,849][03445] Fps is (10 sec: 48332.3, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 337174528. Throughput: 0: 12135.7. Samples: 9281000. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:42:48,850][03445] Avg episode reward: [(0, '50.717')] -[2024-07-05 13:42:49,326][06965] Updated weights for policy 0, policy_version 43605 (0.0010) -[2024-07-05 13:42:51,041][06965] Updated weights for policy 0, policy_version 43615 (0.0008) -[2024-07-05 13:42:52,755][06965] Updated weights for policy 0, policy_version 43625 (0.0011) -[2024-07-05 13:42:53,849][03445] Fps is (10 sec: 48333.0, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 337420288. Throughput: 0: 12129.3. Samples: 9353352. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:42:53,850][03445] Avg episode reward: [(0, '54.316')] -[2024-07-05 13:42:54,427][06965] Updated weights for policy 0, policy_version 43635 (0.0007) -[2024-07-05 13:42:56,148][06965] Updated weights for policy 0, policy_version 43645 (0.0008) -[2024-07-05 13:42:57,834][06965] Updated weights for policy 0, policy_version 43655 (0.0009) -[2024-07-05 13:42:58,849][03445] Fps is (10 sec: 49152.3, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 337666048. Throughput: 0: 12132.0. Samples: 9389884. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:42:58,850][03445] Avg episode reward: [(0, '52.665')] -[2024-07-05 13:42:59,502][06965] Updated weights for policy 0, policy_version 43665 (0.0007) -[2024-07-05 13:43:01,218][06965] Updated weights for policy 0, policy_version 43675 (0.0008) -[2024-07-05 13:43:02,881][06965] Updated weights for policy 0, policy_version 43685 (0.0007) -[2024-07-05 13:43:03,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 337903616. Throughput: 0: 12130.7. Samples: 9462572. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:43:03,850][03445] Avg episode reward: [(0, '52.453')] -[2024-07-05 13:43:04,568][06965] Updated weights for policy 0, policy_version 43695 (0.0009) -[2024-07-05 13:43:06,249][06965] Updated weights for policy 0, policy_version 43705 (0.0008) -[2024-07-05 13:43:07,945][06965] Updated weights for policy 0, policy_version 43715 (0.0008) -[2024-07-05 13:43:08,849][03445] Fps is (10 sec: 48332.9, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 338149376. Throughput: 0: 12140.7. Samples: 9535468. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:43:08,849][03445] Avg episode reward: [(0, '53.995')] -[2024-07-05 13:43:09,637][06965] Updated weights for policy 0, policy_version 43725 (0.0007) -[2024-07-05 13:43:11,296][06965] Updated weights for policy 0, policy_version 43735 (0.0013) -[2024-07-05 13:43:12,980][06965] Updated weights for policy 0, policy_version 43745 (0.0008) -[2024-07-05 13:43:13,849][03445] Fps is (10 sec: 49152.2, 60 sec: 48605.9, 300 sec: 48513.3). Total num frames: 338395136. Throughput: 0: 12143.0. Samples: 9572112. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:43:13,850][03445] Avg episode reward: [(0, '53.906')] -[2024-07-05 13:43:14,649][06965] Updated weights for policy 0, policy_version 43755 (0.0008) -[2024-07-05 13:43:16,336][06965] Updated weights for policy 0, policy_version 43765 (0.0008) -[2024-07-05 13:43:18,007][06965] Updated weights for policy 0, policy_version 43775 (0.0009) -[2024-07-05 13:43:18,849][03445] Fps is (10 sec: 48332.6, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 338632704. Throughput: 0: 12137.7. Samples: 9644784. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:43:18,850][03445] Avg episode reward: [(0, '53.827')] -[2024-07-05 13:43:19,706][06965] Updated weights for policy 0, policy_version 43785 (0.0008) -[2024-07-05 13:43:21,425][06965] Updated weights for policy 0, policy_version 43795 (0.0008) -[2024-07-05 13:43:23,115][06965] Updated weights for policy 0, policy_version 43805 (0.0008) -[2024-07-05 13:43:23,849][03445] Fps is (10 sec: 48333.0, 60 sec: 48605.9, 300 sec: 48513.3). Total num frames: 338878464. Throughput: 0: 12138.4. Samples: 9717772. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:43:23,850][03445] Avg episode reward: [(0, '51.488')] -[2024-07-05 13:43:24,796][06965] Updated weights for policy 0, policy_version 43815 (0.0007) -[2024-07-05 13:43:26,512][06965] Updated weights for policy 0, policy_version 43825 (0.0012) -[2024-07-05 13:43:28,221][06965] Updated weights for policy 0, policy_version 43835 (0.0007) -[2024-07-05 13:43:28,849][03445] Fps is (10 sec: 48332.6, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 339116032. Throughput: 0: 12137.5. Samples: 9754060. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:43:28,850][03445] Avg episode reward: [(0, '52.679')] -[2024-07-05 13:43:29,906][06965] Updated weights for policy 0, policy_version 43845 (0.0011) -[2024-07-05 13:43:31,558][06965] Updated weights for policy 0, policy_version 43855 (0.0010) -[2024-07-05 13:43:33,247][06965] Updated weights for policy 0, policy_version 43865 (0.0007) -[2024-07-05 13:43:33,849][03445] Fps is (10 sec: 48332.6, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 339361792. Throughput: 0: 12129.3. Samples: 9826820. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 13:43:33,849][03445] Avg episode reward: [(0, '54.621')] -[2024-07-05 13:43:34,934][06965] Updated weights for policy 0, policy_version 43875 (0.0008) -[2024-07-05 13:43:36,618][06965] Updated weights for policy 0, policy_version 43885 (0.0010) -[2024-07-05 13:43:38,346][06965] Updated weights for policy 0, policy_version 43895 (0.0007) -[2024-07-05 13:43:38,849][03445] Fps is (10 sec: 49151.9, 60 sec: 48605.8, 300 sec: 48541.1). Total num frames: 339607552. Throughput: 0: 12137.2. Samples: 9899528. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:43:38,850][03445] Avg episode reward: [(0, '54.125')] -[2024-07-05 13:43:39,985][06965] Updated weights for policy 0, policy_version 43905 (0.0010) -[2024-07-05 13:43:41,680][06965] Updated weights for policy 0, policy_version 43915 (0.0008) -[2024-07-05 13:43:43,379][06965] Updated weights for policy 0, policy_version 43925 (0.0008) -[2024-07-05 13:43:43,849][03445] Fps is (10 sec: 48332.4, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 339845120. Throughput: 0: 12130.2. Samples: 9935744. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:43:43,849][03445] Avg episode reward: [(0, '53.805')] -[2024-07-05 13:43:45,042][06965] Updated weights for policy 0, policy_version 43935 (0.0008) -[2024-07-05 13:43:46,756][06965] Updated weights for policy 0, policy_version 43945 (0.0009) -[2024-07-05 13:43:48,475][06965] Updated weights for policy 0, policy_version 43955 (0.0008) -[2024-07-05 13:43:48,849][03445] Fps is (10 sec: 48333.2, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 340090880. Throughput: 0: 12134.3. Samples: 10008616. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:43:48,849][03445] Avg episode reward: [(0, '54.366')] -[2024-07-05 13:43:48,854][06945] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000043957_340090880.pth... -[2024-07-05 13:43:48,920][06945] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000042536_328450048.pth -[2024-07-05 13:43:50,169][06965] Updated weights for policy 0, policy_version 43965 (0.0008) -[2024-07-05 13:43:51,864][06965] Updated weights for policy 0, policy_version 43975 (0.0008) -[2024-07-05 13:43:53,556][06965] Updated weights for policy 0, policy_version 43985 (0.0008) -[2024-07-05 13:43:53,849][03445] Fps is (10 sec: 48333.2, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 340328448. Throughput: 0: 12118.7. Samples: 10080808. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:43:53,849][03445] Avg episode reward: [(0, '53.797')] -[2024-07-05 13:43:55,258][06965] Updated weights for policy 0, policy_version 43995 (0.0009) -[2024-07-05 13:43:56,923][06965] Updated weights for policy 0, policy_version 44005 (0.0008) -[2024-07-05 13:43:58,623][06965] Updated weights for policy 0, policy_version 44015 (0.0008) -[2024-07-05 13:43:58,849][03445] Fps is (10 sec: 48332.4, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 340574208. Throughput: 0: 12119.8. Samples: 10117504. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:43:58,850][03445] Avg episode reward: [(0, '52.386')] -[2024-07-05 13:44:00,303][06965] Updated weights for policy 0, policy_version 44025 (0.0010) -[2024-07-05 13:44:02,040][06965] Updated weights for policy 0, policy_version 44035 (0.0008) -[2024-07-05 13:44:03,704][06965] Updated weights for policy 0, policy_version 44045 (0.0008) -[2024-07-05 13:44:03,849][03445] Fps is (10 sec: 48331.9, 60 sec: 48469.2, 300 sec: 48485.5). Total num frames: 340811776. Throughput: 0: 12116.3. Samples: 10190020. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 13:44:03,850][03445] Avg episode reward: [(0, '52.578')] -[2024-07-05 13:44:05,393][06965] Updated weights for policy 0, policy_version 44055 (0.0010) -[2024-07-05 13:44:07,045][06965] Updated weights for policy 0, policy_version 44065 (0.0007) -[2024-07-05 13:44:08,763][06965] Updated weights for policy 0, policy_version 44075 (0.0007) -[2024-07-05 13:44:08,849][03445] Fps is (10 sec: 48332.7, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 341057536. Throughput: 0: 12110.6. Samples: 10262752. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 13:44:08,850][03445] Avg episode reward: [(0, '54.352')] -[2024-07-05 13:44:10,455][06965] Updated weights for policy 0, policy_version 44085 (0.0007) -[2024-07-05 13:44:12,168][06965] Updated weights for policy 0, policy_version 44095 (0.0009) -[2024-07-05 13:44:13,829][06965] Updated weights for policy 0, policy_version 44105 (0.0007) -[2024-07-05 13:44:13,849][03445] Fps is (10 sec: 49153.0, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 341303296. Throughput: 0: 12114.4. Samples: 10299208. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 13:44:13,850][03445] Avg episode reward: [(0, '54.216')] -[2024-07-05 13:44:15,515][06965] Updated weights for policy 0, policy_version 44115 (0.0008) -[2024-07-05 13:44:17,240][06965] Updated weights for policy 0, policy_version 44125 (0.0008) -[2024-07-05 13:44:18,849][03445] Fps is (10 sec: 48333.2, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 341540864. Throughput: 0: 12106.0. Samples: 10371592. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 13:44:18,849][03445] Avg episode reward: [(0, '55.259')] -[2024-07-05 13:44:18,939][06965] Updated weights for policy 0, policy_version 44135 (0.0008) -[2024-07-05 13:44:20,563][06965] Updated weights for policy 0, policy_version 44145 (0.0007) -[2024-07-05 13:44:22,271][06965] Updated weights for policy 0, policy_version 44155 (0.0007) -[2024-07-05 13:44:23,849][03445] Fps is (10 sec: 48332.2, 60 sec: 48469.2, 300 sec: 48513.3). Total num frames: 341786624. Throughput: 0: 12116.2. Samples: 10444760. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 13:44:23,850][03445] Avg episode reward: [(0, '54.598')] -[2024-07-05 13:44:23,957][06965] Updated weights for policy 0, policy_version 44165 (0.0007) -[2024-07-05 13:44:25,631][06965] Updated weights for policy 0, policy_version 44175 (0.0008) -[2024-07-05 13:44:27,276][06965] Updated weights for policy 0, policy_version 44185 (0.0007) -[2024-07-05 13:44:28,849][03445] Fps is (10 sec: 49152.2, 60 sec: 48605.9, 300 sec: 48513.3). Total num frames: 342032384. Throughput: 0: 12122.8. Samples: 10481268. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) -[2024-07-05 13:44:28,849][03445] Avg episode reward: [(0, '54.046')] -[2024-07-05 13:44:28,995][06965] Updated weights for policy 0, policy_version 44195 (0.0008) -[2024-07-05 13:44:30,708][06965] Updated weights for policy 0, policy_version 44205 (0.0010) -[2024-07-05 13:44:32,445][06965] Updated weights for policy 0, policy_version 44215 (0.0008) -[2024-07-05 13:44:33,849][03445] Fps is (10 sec: 48333.2, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 342269952. Throughput: 0: 12117.2. Samples: 10553892. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:44:33,850][03445] Avg episode reward: [(0, '54.777')] -[2024-07-05 13:44:34,069][06965] Updated weights for policy 0, policy_version 44225 (0.0008) -[2024-07-05 13:44:35,782][06965] Updated weights for policy 0, policy_version 44235 (0.0008) -[2024-07-05 13:44:37,481][06965] Updated weights for policy 0, policy_version 44245 (0.0007) -[2024-07-05 13:44:38,849][03445] Fps is (10 sec: 48332.6, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 342515712. Throughput: 0: 12135.2. Samples: 10626892. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:44:38,850][03445] Avg episode reward: [(0, '52.784')] -[2024-07-05 13:44:39,142][06965] Updated weights for policy 0, policy_version 44255 (0.0007) -[2024-07-05 13:44:40,824][06965] Updated weights for policy 0, policy_version 44265 (0.0007) -[2024-07-05 13:44:42,513][06965] Updated weights for policy 0, policy_version 44275 (0.0008) -[2024-07-05 13:44:43,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48469.4, 300 sec: 48485.5). Total num frames: 342753280. Throughput: 0: 12128.4. Samples: 10663284. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:44:43,850][03445] Avg episode reward: [(0, '49.904')] -[2024-07-05 13:44:44,198][06965] Updated weights for policy 0, policy_version 44285 (0.0011) -[2024-07-05 13:44:45,901][06965] Updated weights for policy 0, policy_version 44295 (0.0008) -[2024-07-05 13:44:47,586][06965] Updated weights for policy 0, policy_version 44305 (0.0008) -[2024-07-05 13:44:48,849][03445] Fps is (10 sec: 48332.9, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 342999040. Throughput: 0: 12134.0. Samples: 10736048. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:44:48,849][03445] Avg episode reward: [(0, '51.039')] -[2024-07-05 13:44:49,273][06965] Updated weights for policy 0, policy_version 44315 (0.0009) -[2024-07-05 13:44:50,971][06965] Updated weights for policy 0, policy_version 44325 (0.0008) -[2024-07-05 13:44:52,649][06965] Updated weights for policy 0, policy_version 44335 (0.0008) -[2024-07-05 13:44:53,849][03445] Fps is (10 sec: 49152.2, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 343244800. Throughput: 0: 12134.8. Samples: 10808816. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 13:44:53,849][03445] Avg episode reward: [(0, '53.743')] -[2024-07-05 13:44:54,338][06965] Updated weights for policy 0, policy_version 44345 (0.0008) -[2024-07-05 13:44:56,017][06965] Updated weights for policy 0, policy_version 44355 (0.0007) -[2024-07-05 13:44:57,657][06965] Updated weights for policy 0, policy_version 44365 (0.0010) -[2024-07-05 13:44:58,848][03445] Fps is (10 sec: 48332.9, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 343482368. Throughput: 0: 12134.2. Samples: 10845248. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:44:58,849][03445] Avg episode reward: [(0, '53.178')] -[2024-07-05 13:44:59,361][06965] Updated weights for policy 0, policy_version 44375 (0.0010) -[2024-07-05 13:45:01,094][06965] Updated weights for policy 0, policy_version 44385 (0.0008) -[2024-07-05 13:45:02,769][06965] Updated weights for policy 0, policy_version 44395 (0.0010) -[2024-07-05 13:45:03,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48606.0, 300 sec: 48513.3). Total num frames: 343728128. Throughput: 0: 12144.2. Samples: 10918080. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:45:03,850][03445] Avg episode reward: [(0, '50.733')] -[2024-07-05 13:45:04,415][06965] Updated weights for policy 0, policy_version 44405 (0.0007) -[2024-07-05 13:45:06,140][06965] Updated weights for policy 0, policy_version 44415 (0.0008) -[2024-07-05 13:45:07,801][06965] Updated weights for policy 0, policy_version 44425 (0.0008) -[2024-07-05 13:45:08,849][03445] Fps is (10 sec: 49151.9, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 343973888. Throughput: 0: 12138.5. Samples: 10990992. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:45:08,850][03445] Avg episode reward: [(0, '55.420')] -[2024-07-05 13:45:09,508][06965] Updated weights for policy 0, policy_version 44435 (0.0008) -[2024-07-05 13:45:11,181][06965] Updated weights for policy 0, policy_version 44445 (0.0007) -[2024-07-05 13:45:12,861][06965] Updated weights for policy 0, policy_version 44455 (0.0008) -[2024-07-05 13:45:13,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 344211456. Throughput: 0: 12135.0. Samples: 11027344. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:45:13,850][03445] Avg episode reward: [(0, '52.915')] -[2024-07-05 13:45:14,574][06965] Updated weights for policy 0, policy_version 44465 (0.0010) -[2024-07-05 13:45:16,310][06965] Updated weights for policy 0, policy_version 44475 (0.0008) -[2024-07-05 13:45:17,994][06965] Updated weights for policy 0, policy_version 44485 (0.0008) -[2024-07-05 13:45:18,849][03445] Fps is (10 sec: 48332.2, 60 sec: 48605.8, 300 sec: 48513.3). Total num frames: 344457216. Throughput: 0: 12137.6. Samples: 11100084. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:45:18,850][03445] Avg episode reward: [(0, '53.242')] -[2024-07-05 13:45:19,669][06965] Updated weights for policy 0, policy_version 44495 (0.0008) -[2024-07-05 13:45:21,376][06965] Updated weights for policy 0, policy_version 44505 (0.0008) -[2024-07-05 13:45:23,054][06965] Updated weights for policy 0, policy_version 44515 (0.0008) -[2024-07-05 13:45:23,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 344694784. Throughput: 0: 12126.0. Samples: 11172560. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:45:23,850][03445] Avg episode reward: [(0, '53.383')] -[2024-07-05 13:45:24,723][06965] Updated weights for policy 0, policy_version 44525 (0.0007) -[2024-07-05 13:45:26,400][06965] Updated weights for policy 0, policy_version 44535 (0.0008) -[2024-07-05 13:45:28,123][06965] Updated weights for policy 0, policy_version 44545 (0.0008) -[2024-07-05 13:45:28,849][03445] Fps is (10 sec: 48332.5, 60 sec: 48469.2, 300 sec: 48513.3). Total num frames: 344940544. Throughput: 0: 12127.1. Samples: 11209004. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:45:28,850][03445] Avg episode reward: [(0, '52.923')] -[2024-07-05 13:45:29,824][06965] Updated weights for policy 0, policy_version 44555 (0.0010) -[2024-07-05 13:45:31,499][06965] Updated weights for policy 0, policy_version 44565 (0.0007) -[2024-07-05 13:45:33,173][06965] Updated weights for policy 0, policy_version 44575 (0.0011) -[2024-07-05 13:45:33,849][03445] Fps is (10 sec: 48332.7, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 345178112. Throughput: 0: 12123.5. Samples: 11281604. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:45:33,850][03445] Avg episode reward: [(0, '53.933')] -[2024-07-05 13:45:34,874][06965] Updated weights for policy 0, policy_version 44585 (0.0008) -[2024-07-05 13:45:36,597][06965] Updated weights for policy 0, policy_version 44595 (0.0010) -[2024-07-05 13:45:38,275][06965] Updated weights for policy 0, policy_version 44605 (0.0008) -[2024-07-05 13:45:38,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 345423872. Throughput: 0: 12119.9. Samples: 11354212. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:45:38,850][03445] Avg episode reward: [(0, '51.026')] -[2024-07-05 13:45:39,937][06965] Updated weights for policy 0, policy_version 44615 (0.0009) -[2024-07-05 13:45:41,650][06965] Updated weights for policy 0, policy_version 44625 (0.0007) -[2024-07-05 13:45:43,326][06965] Updated weights for policy 0, policy_version 44635 (0.0008) -[2024-07-05 13:45:43,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 345661440. Throughput: 0: 12117.9. Samples: 11390556. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:45:43,850][03445] Avg episode reward: [(0, '52.057')] -[2024-07-05 13:45:45,013][06965] Updated weights for policy 0, policy_version 44645 (0.0009) -[2024-07-05 13:45:46,701][06965] Updated weights for policy 0, policy_version 44655 (0.0008) -[2024-07-05 13:45:48,385][06965] Updated weights for policy 0, policy_version 44665 (0.0008) -[2024-07-05 13:45:48,849][03445] Fps is (10 sec: 48333.3, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 345907200. Throughput: 0: 12115.3. Samples: 11463268. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:45:48,850][03445] Avg episode reward: [(0, '53.535')] -[2024-07-05 13:45:48,853][06945] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000044667_345907200.pth... -[2024-07-05 13:45:48,923][06945] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000043246_334266368.pth -[2024-07-05 13:45:50,118][06965] Updated weights for policy 0, policy_version 44675 (0.0007) -[2024-07-05 13:45:51,842][06965] Updated weights for policy 0, policy_version 44685 (0.0013) -[2024-07-05 13:45:53,512][06965] Updated weights for policy 0, policy_version 44695 (0.0010) -[2024-07-05 13:45:53,849][03445] Fps is (10 sec: 49151.8, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 346152960. Throughput: 0: 12100.0. Samples: 11535492. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:45:53,850][03445] Avg episode reward: [(0, '54.268')] -[2024-07-05 13:45:55,208][06965] Updated weights for policy 0, policy_version 44705 (0.0008) -[2024-07-05 13:45:56,915][06965] Updated weights for policy 0, policy_version 44715 (0.0007) -[2024-07-05 13:45:58,640][06965] Updated weights for policy 0, policy_version 44725 (0.0009) -[2024-07-05 13:45:58,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 346390528. Throughput: 0: 12098.2. Samples: 11571764. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:45:58,850][03445] Avg episode reward: [(0, '50.892')] -[2024-07-05 13:46:00,273][06965] Updated weights for policy 0, policy_version 44735 (0.0008) -[2024-07-05 13:46:01,951][06965] Updated weights for policy 0, policy_version 44745 (0.0008) -[2024-07-05 13:46:03,658][06965] Updated weights for policy 0, policy_version 44755 (0.0008) -[2024-07-05 13:46:03,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 346636288. Throughput: 0: 12099.0. Samples: 11644536. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:46:03,850][03445] Avg episode reward: [(0, '53.515')] -[2024-07-05 13:46:05,348][06965] Updated weights for policy 0, policy_version 44765 (0.0007) -[2024-07-05 13:46:07,050][06965] Updated weights for policy 0, policy_version 44775 (0.0007) -[2024-07-05 13:46:08,713][06965] Updated weights for policy 0, policy_version 44785 (0.0008) -[2024-07-05 13:46:08,849][03445] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48485.5). Total num frames: 346873856. Throughput: 0: 12103.4. Samples: 11717212. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:46:08,849][03445] Avg episode reward: [(0, '55.426')] -[2024-07-05 13:46:10,394][06965] Updated weights for policy 0, policy_version 44795 (0.0008) -[2024-07-05 13:46:12,103][06965] Updated weights for policy 0, policy_version 44805 (0.0011) -[2024-07-05 13:46:13,784][06965] Updated weights for policy 0, policy_version 44815 (0.0008) -[2024-07-05 13:46:13,849][03445] Fps is (10 sec: 48332.2, 60 sec: 48469.2, 300 sec: 48513.3). Total num frames: 347119616. Throughput: 0: 12103.8. Samples: 11753676. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:46:13,850][03445] Avg episode reward: [(0, '51.834')] -[2024-07-05 13:46:15,461][06965] Updated weights for policy 0, policy_version 44825 (0.0008) -[2024-07-05 13:46:17,177][06965] Updated weights for policy 0, policy_version 44835 (0.0010) -[2024-07-05 13:46:18,844][06965] Updated weights for policy 0, policy_version 44845 (0.0007) -[2024-07-05 13:46:18,848][03445] Fps is (10 sec: 49152.3, 60 sec: 48469.5, 300 sec: 48513.3). Total num frames: 347365376. Throughput: 0: 12113.4. Samples: 11826704. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:46:18,850][03445] Avg episode reward: [(0, '54.949')] -[2024-07-05 13:46:20,562][06965] Updated weights for policy 0, policy_version 44855 (0.0007) -[2024-07-05 13:46:22,276][06965] Updated weights for policy 0, policy_version 44865 (0.0008) -[2024-07-05 13:46:23,849][03445] Fps is (10 sec: 48333.1, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 347602944. Throughput: 0: 12106.1. Samples: 11898984. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:46:23,850][03445] Avg episode reward: [(0, '52.774')] -[2024-07-05 13:46:23,973][06965] Updated weights for policy 0, policy_version 44875 (0.0007) -[2024-07-05 13:46:25,650][06965] Updated weights for policy 0, policy_version 44885 (0.0008) -[2024-07-05 13:46:27,314][06965] Updated weights for policy 0, policy_version 44895 (0.0009) -[2024-07-05 13:46:28,849][03445] Fps is (10 sec: 48332.4, 60 sec: 48469.5, 300 sec: 48513.3). Total num frames: 347848704. Throughput: 0: 12114.7. Samples: 11935716. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:46:28,850][03445] Avg episode reward: [(0, '53.900')] -[2024-07-05 13:46:29,009][06965] Updated weights for policy 0, policy_version 44905 (0.0008) -[2024-07-05 13:46:30,684][06965] Updated weights for policy 0, policy_version 44915 (0.0008) -[2024-07-05 13:46:32,360][06965] Updated weights for policy 0, policy_version 44925 (0.0008) -[2024-07-05 13:46:33,849][03445] Fps is (10 sec: 48332.7, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 348086272. Throughput: 0: 12101.2. Samples: 12007824. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:46:33,850][03445] Avg episode reward: [(0, '54.053')] -[2024-07-05 13:46:34,095][06965] Updated weights for policy 0, policy_version 44935 (0.0010) -[2024-07-05 13:46:35,791][06965] Updated weights for policy 0, policy_version 44945 (0.0008) -[2024-07-05 13:46:37,503][06965] Updated weights for policy 0, policy_version 44955 (0.0008) -[2024-07-05 13:46:38,849][03445] Fps is (10 sec: 47513.3, 60 sec: 48332.8, 300 sec: 48485.5). Total num frames: 348323840. Throughput: 0: 12109.1. Samples: 12080400. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:46:38,850][03445] Avg episode reward: [(0, '54.133')] -[2024-07-05 13:46:39,216][06965] Updated weights for policy 0, policy_version 44965 (0.0008) -[2024-07-05 13:46:40,862][06965] Updated weights for policy 0, policy_version 44975 (0.0008) -[2024-07-05 13:46:42,658][06965] Updated weights for policy 0, policy_version 44985 (0.0008) -[2024-07-05 13:46:43,849][03445] Fps is (10 sec: 48333.0, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 348569600. Throughput: 0: 12103.1. Samples: 12116404. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:46:43,849][03445] Avg episode reward: [(0, '54.624')] -[2024-07-05 13:46:44,353][06965] Updated weights for policy 0, policy_version 44995 (0.0009) -[2024-07-05 13:46:46,014][06965] Updated weights for policy 0, policy_version 45005 (0.0007) -[2024-07-05 13:46:47,646][06965] Updated weights for policy 0, policy_version 45015 (0.0007) -[2024-07-05 13:46:48,849][03445] Fps is (10 sec: 49152.1, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 348815360. Throughput: 0: 12102.4. Samples: 12189144. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:46:48,850][03445] Avg episode reward: [(0, '54.617')] -[2024-07-05 13:46:49,342][06965] Updated weights for policy 0, policy_version 45025 (0.0007) -[2024-07-05 13:46:50,971][06965] Updated weights for policy 0, policy_version 45035 (0.0011) -[2024-07-05 13:46:52,687][06965] Updated weights for policy 0, policy_version 45045 (0.0008) -[2024-07-05 13:46:53,849][03445] Fps is (10 sec: 49151.8, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 349061120. Throughput: 0: 12119.7. Samples: 12262600. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:46:53,850][03445] Avg episode reward: [(0, '54.051')] -[2024-07-05 13:46:54,353][06965] Updated weights for policy 0, policy_version 45055 (0.0009) -[2024-07-05 13:46:56,011][06965] Updated weights for policy 0, policy_version 45065 (0.0008) -[2024-07-05 13:46:57,693][06965] Updated weights for policy 0, policy_version 45075 (0.0008) -[2024-07-05 13:46:58,849][03445] Fps is (10 sec: 48333.2, 60 sec: 48469.4, 300 sec: 48485.6). Total num frames: 349298688. Throughput: 0: 12120.4. Samples: 12299092. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:46:58,849][03445] Avg episode reward: [(0, '55.527')] -[2024-07-05 13:46:59,391][06965] Updated weights for policy 0, policy_version 45085 (0.0010) -[2024-07-05 13:47:01,091][06965] Updated weights for policy 0, policy_version 45095 (0.0008) -[2024-07-05 13:47:02,757][06965] Updated weights for policy 0, policy_version 45105 (0.0008) -[2024-07-05 13:47:03,849][03445] Fps is (10 sec: 48331.7, 60 sec: 48469.1, 300 sec: 48485.5). Total num frames: 349544448. Throughput: 0: 12129.1. Samples: 12372516. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:47:03,850][03445] Avg episode reward: [(0, '52.984')] -[2024-07-05 13:47:04,419][06965] Updated weights for policy 0, policy_version 45115 (0.0007) -[2024-07-05 13:47:06,068][06965] Updated weights for policy 0, policy_version 45125 (0.0008) -[2024-07-05 13:47:07,757][06965] Updated weights for policy 0, policy_version 45135 (0.0009) -[2024-07-05 13:47:08,849][03445] Fps is (10 sec: 49151.8, 60 sec: 48605.9, 300 sec: 48513.3). Total num frames: 349790208. Throughput: 0: 12157.0. Samples: 12446048. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:47:08,850][03445] Avg episode reward: [(0, '53.437')] -[2024-07-05 13:47:09,445][06965] Updated weights for policy 0, policy_version 45145 (0.0007) -[2024-07-05 13:47:11,105][06965] Updated weights for policy 0, policy_version 45155 (0.0010) -[2024-07-05 13:47:12,805][06965] Updated weights for policy 0, policy_version 45165 (0.0007) -[2024-07-05 13:47:13,251][06945] Stopping Batcher_0... -[2024-07-05 13:47:13,252][06945] Loop batcher_evt_loop terminating... -[2024-07-05 13:47:13,252][03445] Component Batcher_0 stopped! -[2024-07-05 13:47:13,255][06945] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000045168_350011392.pth... -[2024-07-05 13:47:13,281][03445] Component RolloutWorker_w2 stopped! -[2024-07-05 13:47:13,282][06995] Stopping RolloutWorker_w12... -[2024-07-05 13:47:13,282][06995] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 13:47:13,282][03445] Component RolloutWorker_w12 stopped! -[2024-07-05 13:47:13,282][06967] Stopping RolloutWorker_w2... -[2024-07-05 13:47:13,282][03445] Component RolloutWorker_w1 stopped! -[2024-07-05 13:47:13,283][06968] Stopping RolloutWorker_w1... -[2024-07-05 13:47:13,283][06975] Stopping RolloutWorker_w9... -[2024-07-05 13:47:13,283][06968] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 13:47:13,283][06967] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 13:47:13,283][03445] Component RolloutWorker_w9 stopped! -[2024-07-05 13:47:13,283][06975] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 13:47:13,285][06992] Stopping RolloutWorker_w10... -[2024-07-05 13:47:13,285][06993] Stopping RolloutWorker_w13... -[2024-07-05 13:47:13,285][06994] Stopping RolloutWorker_w14... -[2024-07-05 13:47:13,285][06996] Stopping RolloutWorker_w15... -[2024-07-05 13:47:13,286][06993] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 13:47:13,286][06994] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 13:47:13,286][06996] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 13:47:13,286][06992] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 13:47:13,285][03445] Component RolloutWorker_w10 stopped! -[2024-07-05 13:47:13,286][06991] Stopping RolloutWorker_w11... -[2024-07-05 13:47:13,287][06991] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 13:47:13,286][03445] Component RolloutWorker_w13 stopped! -[2024-07-05 13:47:13,287][03445] Component RolloutWorker_w15 stopped! -[2024-07-05 13:47:13,288][06971] Stopping RolloutWorker_w5... -[2024-07-05 13:47:13,288][06971] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 13:47:13,288][03445] Component RolloutWorker_w14 stopped! -[2024-07-05 13:47:13,288][06972] Stopping RolloutWorker_w6... -[2024-07-05 13:47:13,288][03445] Component RolloutWorker_w11 stopped! -[2024-07-05 13:47:13,289][03445] Component RolloutWorker_w5 stopped! -[2024-07-05 13:47:13,290][03445] Component RolloutWorker_w6 stopped! -[2024-07-05 13:47:13,290][06972] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 13:47:13,291][06966] Stopping RolloutWorker_w0... -[2024-07-05 13:47:13,292][06966] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 13:47:13,291][03445] Component RolloutWorker_w0 stopped! -[2024-07-05 13:47:13,292][06969] Stopping RolloutWorker_w4... -[2024-07-05 13:47:13,292][06969] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 13:47:13,292][03445] Component RolloutWorker_w4 stopped! -[2024-07-05 13:47:13,308][03445] Component RolloutWorker_w8 stopped! -[2024-07-05 13:47:13,306][06974] Stopping RolloutWorker_w8... -[2024-07-05 13:47:13,312][06965] Weights refcount: 2 0 -[2024-07-05 13:47:13,312][06974] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 13:47:13,312][03445] Component RolloutWorker_w3 stopped! -[2024-07-05 13:47:13,313][06970] Stopping RolloutWorker_w3... -[2024-07-05 13:47:13,313][06970] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 13:47:13,321][06965] Stopping InferenceWorker_p0-w0... -[2024-07-05 13:47:13,321][06965] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 13:47:13,321][03445] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 13:47:13,331][06973] Stopping RolloutWorker_w7... -[2024-07-05 13:47:13,332][06973] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 13:47:13,331][03445] Component RolloutWorker_w7 stopped! -[2024-07-05 13:47:13,352][06945] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000043957_340090880.pth -[2024-07-05 13:47:13,361][06945] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000045168_350011392.pth... -[2024-07-05 13:47:13,474][06945] Stopping LearnerWorker_p0... -[2024-07-05 13:47:13,477][06945] Loop learner_proc0_evt_loop terminating... -[2024-07-05 13:47:13,480][03445] Component LearnerWorker_p0 stopped! -[2024-07-05 13:47:13,481][03445] Waiting for process learner_proc0 to stop... -[2024-07-05 13:47:14,774][03445] Waiting for process inference_proc0-0 to join... -[2024-07-05 13:47:14,775][03445] Waiting for process rollout_proc0 to join... -[2024-07-05 13:47:14,776][03445] Waiting for process rollout_proc1 to join... -[2024-07-05 13:47:14,776][03445] Waiting for process rollout_proc2 to join... -[2024-07-05 13:47:14,776][03445] Waiting for process rollout_proc3 to join... -[2024-07-05 13:47:14,777][03445] Waiting for process rollout_proc4 to join... -[2024-07-05 13:47:14,777][03445] Waiting for process rollout_proc5 to join... -[2024-07-05 13:47:14,777][03445] Waiting for process rollout_proc6 to join... -[2024-07-05 13:47:14,778][03445] Waiting for process rollout_proc7 to join... -[2024-07-05 13:47:14,805][03445] Waiting for process rollout_proc8 to join... -[2024-07-05 13:47:14,806][03445] Waiting for process rollout_proc9 to join... -[2024-07-05 13:47:14,806][03445] Waiting for process rollout_proc10 to join... -[2024-07-05 13:47:14,807][03445] Waiting for process rollout_proc11 to join... -[2024-07-05 13:47:14,807][03445] Waiting for process rollout_proc12 to join... -[2024-07-05 13:47:14,808][03445] Waiting for process rollout_proc13 to join... -[2024-07-05 13:47:14,808][03445] Waiting for process rollout_proc14 to join... -[2024-07-05 13:47:14,808][03445] Waiting for process rollout_proc15 to join... -[2024-07-05 13:47:14,809][03445] Batcher 0 profile tree view: -batching: 72.9419, releasing_batches: 0.1442 -[2024-07-05 13:47:14,809][03445] InferenceWorker_p0-w0 profile tree view: -wait_policy: 0.0001 - wait_policy_total: 49.6479 -update_model: 15.7902 - weight_update: 0.0007 -one_step: 0.0031 - handle_policy_step: 937.0904 - deserialize: 72.2385, stack: 5.1411, obs_to_device_normalize: 223.7636, forward: 436.2654, send_messages: 47.6115 - prepare_outputs: 120.0958 - to_cpu: 72.9398 -[2024-07-05 13:47:14,810][03445] Learner 0 profile tree view: -misc: 0.0292, prepare_batch: 99.1240 -train: 215.8834 - epoch_init: 0.0193, minibatch_init: 0.0265, losses_postprocess: 1.3278, kl_divergence: 1.4017, after_optimizer: 0.9846 - calculate_losses: 76.0781 - losses_init: 0.0106, forward_head: 3.0896, bptt_initial: 59.8919, tail: 2.6914, advantages_returns: 0.7388, losses: 4.2373 - bptt: 4.5926 - bptt_forward_core: 4.3832 - update: 134.0814 - clip: 3.9925 -[2024-07-05 13:47:14,810][03445] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.4464, enqueue_policy_requests: 30.4462, env_step: 489.4628, overhead: 50.1318, complete_rollouts: 1.0558 -save_policy_outputs: 36.9287 - split_output_tensors: 17.3248 -[2024-07-05 13:47:14,810][03445] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.4912, enqueue_policy_requests: 31.4885, env_step: 500.8608, overhead: 51.9969, complete_rollouts: 1.0848 -save_policy_outputs: 37.1938 - split_output_tensors: 17.2550 -[2024-07-05 13:47:14,811][03445] Loop Runner_EvtLoop terminating... -[2024-07-05 13:47:14,811][03445] Runner profile tree view: -main_loop: 1043.3001 -[2024-07-05 13:47:14,812][03445] Collected {0: 350011392}, FPS: 47920.8 -[2024-07-05 13:50:15,236][03445] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 13:50:15,237][03445] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 13:50:15,238][03445] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 13:50:15,238][03445] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 13:50:15,238][03445] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 13:50:15,239][03445] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 13:50:15,239][03445] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 13:50:15,239][03445] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 13:50:15,239][03445] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 13:50:15,240][03445] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 13:50:15,240][03445] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 13:50:15,240][03445] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 13:50:15,241][03445] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 13:50:15,241][03445] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 13:50:15,241][03445] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 13:50:15,261][03445] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 13:50:15,262][03445] RunningMeanStd input shape: (1,) -[2024-07-05 13:50:15,272][03445] ConvEncoder: input_channels=3 -[2024-07-05 13:50:15,299][03445] Conv encoder output size: 512 -[2024-07-05 13:50:15,299][03445] Policy head output size: 512 -[2024-07-05 13:50:15,318][03445] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000045168_350011392.pth... -[2024-07-05 13:50:15,973][03445] Num frames 100... -[2024-07-05 13:50:16,053][03445] Num frames 200... -[2024-07-05 13:50:16,130][03445] Num frames 300... -[2024-07-05 13:50:16,208][03445] Num frames 400... -[2024-07-05 13:50:16,291][03445] Num frames 500... -[2024-07-05 13:50:16,372][03445] Num frames 600... -[2024-07-05 13:50:16,452][03445] Num frames 700... -[2024-07-05 13:50:16,516][03445] Num frames 800... -[2024-07-05 13:50:16,579][03445] Num frames 900... -[2024-07-05 13:50:16,641][03445] Num frames 1000... -[2024-07-05 13:50:16,705][03445] Num frames 1100... -[2024-07-05 13:50:16,767][03445] Num frames 1200... -[2024-07-05 13:50:16,827][03445] Num frames 1300... -[2024-07-05 13:50:16,886][03445] Num frames 1400... -[2024-07-05 13:50:16,946][03445] Num frames 1500... -[2024-07-05 13:50:17,007][03445] Num frames 1600... -[2024-07-05 13:50:17,066][03445] Num frames 1700... -[2024-07-05 13:50:17,126][03445] Num frames 1800... -[2024-07-05 13:50:17,187][03445] Num frames 1900... -[2024-07-05 13:50:17,249][03445] Num frames 2000... -[2024-07-05 13:50:17,313][03445] Avg episode rewards: #0: 52.169, true rewards: #0: 20.170 -[2024-07-05 13:50:17,314][03445] Avg episode reward: 52.169, avg true_objective: 20.170 -[2024-07-05 13:50:17,363][03445] Num frames 2100... -[2024-07-05 13:50:17,423][03445] Num frames 2200... -[2024-07-05 13:50:17,484][03445] Num frames 2300... -[2024-07-05 13:50:17,546][03445] Num frames 2400... -[2024-07-05 13:50:17,604][03445] Num frames 2500... -[2024-07-05 13:50:17,664][03445] Num frames 2600... -[2024-07-05 13:50:17,725][03445] Num frames 2700... -[2024-07-05 13:50:17,786][03445] Num frames 2800... -[2024-07-05 13:50:17,851][03445] Num frames 2900... -[2024-07-05 13:50:17,932][03445] Num frames 3000... -[2024-07-05 13:50:18,025][03445] Num frames 3100... -[2024-07-05 13:50:18,108][03445] Num frames 3200... -[2024-07-05 13:50:18,169][03445] Num frames 3300... -[2024-07-05 13:50:18,230][03445] Num frames 3400... -[2024-07-05 13:50:18,292][03445] Num frames 3500... -[2024-07-05 13:50:18,352][03445] Num frames 3600... -[2024-07-05 13:50:18,415][03445] Num frames 3700... -[2024-07-05 13:50:18,488][03445] Num frames 3800... -[2024-07-05 13:50:18,550][03445] Num frames 3900... -[2024-07-05 13:50:18,616][03445] Num frames 4000... -[2024-07-05 13:50:18,677][03445] Num frames 4100... -[2024-07-05 13:50:18,740][03445] Avg episode rewards: #0: 56.084, true rewards: #0: 20.585 -[2024-07-05 13:50:18,742][03445] Avg episode reward: 56.084, avg true_objective: 20.585 -[2024-07-05 13:50:18,794][03445] Num frames 4200... -[2024-07-05 13:50:18,854][03445] Num frames 4300... -[2024-07-05 13:50:18,916][03445] Num frames 4400... -[2024-07-05 13:50:18,976][03445] Num frames 4500... -[2024-07-05 13:50:19,035][03445] Num frames 4600... -[2024-07-05 13:50:19,093][03445] Num frames 4700... -[2024-07-05 13:50:19,153][03445] Num frames 4800... -[2024-07-05 13:50:19,215][03445] Num frames 4900... -[2024-07-05 13:50:19,274][03445] Num frames 5000... -[2024-07-05 13:50:19,335][03445] Num frames 5100... -[2024-07-05 13:50:19,394][03445] Num frames 5200... -[2024-07-05 13:50:19,451][03445] Num frames 5300... -[2024-07-05 13:50:19,511][03445] Num frames 5400... -[2024-07-05 13:50:19,570][03445] Num frames 5500... -[2024-07-05 13:50:19,630][03445] Num frames 5600... -[2024-07-05 13:50:19,690][03445] Num frames 5700... -[2024-07-05 13:50:19,750][03445] Num frames 5800... -[2024-07-05 13:50:19,808][03445] Num frames 5900... -[2024-07-05 13:50:19,868][03445] Num frames 6000... -[2024-07-05 13:50:19,926][03445] Num frames 6100... -[2024-07-05 13:50:19,985][03445] Num frames 6200... -[2024-07-05 13:50:20,050][03445] Avg episode rewards: #0: 58.722, true rewards: #0: 20.723 -[2024-07-05 13:50:20,052][03445] Avg episode reward: 58.722, avg true_objective: 20.723 -[2024-07-05 13:50:20,104][03445] Num frames 6300... -[2024-07-05 13:50:20,164][03445] Num frames 6400... -[2024-07-05 13:50:20,224][03445] Num frames 6500... -[2024-07-05 13:50:20,287][03445] Num frames 6600... -[2024-07-05 13:50:20,347][03445] Num frames 6700... -[2024-07-05 13:50:20,407][03445] Num frames 6800... -[2024-07-05 13:50:20,470][03445] Num frames 6900... -[2024-07-05 13:50:20,529][03445] Num frames 7000... -[2024-07-05 13:50:20,588][03445] Num frames 7100... -[2024-07-05 13:50:20,646][03445] Num frames 7200... -[2024-07-05 13:50:20,704][03445] Num frames 7300... -[2024-07-05 13:50:20,764][03445] Num frames 7400... -[2024-07-05 13:50:20,834][03445] Num frames 7500... -[2024-07-05 13:50:20,895][03445] Num frames 7600... -[2024-07-05 13:50:20,958][03445] Num frames 7700... -[2024-07-05 13:50:21,017][03445] Num frames 7800... -[2024-07-05 13:50:21,076][03445] Num frames 7900... -[2024-07-05 13:50:21,138][03445] Num frames 8000... -[2024-07-05 13:50:21,199][03445] Num frames 8100... -[2024-07-05 13:50:21,260][03445] Num frames 8200... -[2024-07-05 13:50:21,320][03445] Num frames 8300... -[2024-07-05 13:50:21,384][03445] Avg episode rewards: #0: 60.291, true rewards: #0: 20.793 -[2024-07-05 13:50:21,386][03445] Avg episode reward: 60.291, avg true_objective: 20.793 -[2024-07-05 13:50:21,435][03445] Num frames 8400... -[2024-07-05 13:50:21,494][03445] Num frames 8500... -[2024-07-05 13:50:21,557][03445] Num frames 8600... -[2024-07-05 13:50:21,615][03445] Num frames 8700... -[2024-07-05 13:50:21,676][03445] Num frames 8800... -[2024-07-05 13:50:21,735][03445] Num frames 8900... -[2024-07-05 13:50:21,793][03445] Num frames 9000... -[2024-07-05 13:50:21,854][03445] Num frames 9100... -[2024-07-05 13:50:21,913][03445] Num frames 9200... -[2024-07-05 13:50:21,972][03445] Num frames 9300... -[2024-07-05 13:50:22,032][03445] Num frames 9400... -[2024-07-05 13:50:22,093][03445] Num frames 9500... -[2024-07-05 13:50:22,156][03445] Num frames 9600... -[2024-07-05 13:50:22,216][03445] Num frames 9700... -[2024-07-05 13:50:22,278][03445] Num frames 9800... -[2024-07-05 13:50:22,337][03445] Num frames 9900... -[2024-07-05 13:50:22,394][03445] Num frames 10000... -[2024-07-05 13:50:22,453][03445] Num frames 10100... -[2024-07-05 13:50:22,511][03445] Num frames 10200... -[2024-07-05 13:50:22,570][03445] Num frames 10300... -[2024-07-05 13:50:22,630][03445] Num frames 10400... -[2024-07-05 13:50:22,694][03445] Avg episode rewards: #0: 61.233, true rewards: #0: 20.834 -[2024-07-05 13:50:22,695][03445] Avg episode reward: 61.233, avg true_objective: 20.834 -[2024-07-05 13:50:22,748][03445] Num frames 10500... -[2024-07-05 13:50:22,807][03445] Num frames 10600... -[2024-07-05 13:50:22,866][03445] Num frames 10700... -[2024-07-05 13:50:22,926][03445] Num frames 10800... -[2024-07-05 13:50:22,985][03445] Num frames 10900... -[2024-07-05 13:50:23,046][03445] Num frames 11000... -[2024-07-05 13:50:23,105][03445] Num frames 11100... -[2024-07-05 13:50:23,163][03445] Num frames 11200... -[2024-07-05 13:50:23,222][03445] Num frames 11300... -[2024-07-05 13:50:23,281][03445] Num frames 11400... -[2024-07-05 13:50:23,342][03445] Num frames 11500... -[2024-07-05 13:50:23,405][03445] Num frames 11600... -[2024-07-05 13:50:23,465][03445] Num frames 11700... -[2024-07-05 13:50:23,526][03445] Num frames 11800... -[2024-07-05 13:50:23,588][03445] Num frames 11900... -[2024-07-05 13:50:23,648][03445] Num frames 12000... -[2024-07-05 13:50:23,707][03445] Num frames 12100... -[2024-07-05 13:50:23,778][03445] Num frames 12200... -[2024-07-05 13:50:23,839][03445] Num frames 12300... -[2024-07-05 13:50:23,904][03445] Num frames 12400... -[2024-07-05 13:50:23,963][03445] Num frames 12500... -[2024-07-05 13:50:24,028][03445] Avg episode rewards: #0: 61.360, true rewards: #0: 20.862 -[2024-07-05 13:50:24,029][03445] Avg episode reward: 61.360, avg true_objective: 20.862 -[2024-07-05 13:50:24,080][03445] Num frames 12600... -[2024-07-05 13:50:24,140][03445] Num frames 12700... -[2024-07-05 13:50:24,197][03445] Num frames 12800... -[2024-07-05 13:50:24,255][03445] Num frames 12900... -[2024-07-05 13:50:24,314][03445] Num frames 13000... -[2024-07-05 13:50:24,372][03445] Num frames 13100... -[2024-07-05 13:50:24,429][03445] Num frames 13200... -[2024-07-05 13:50:24,489][03445] Num frames 13300... -[2024-07-05 13:50:24,553][03445] Num frames 13400... -[2024-07-05 13:50:24,612][03445] Num frames 13500... -[2024-07-05 13:50:24,670][03445] Num frames 13600... -[2024-07-05 13:50:24,729][03445] Num frames 13700... -[2024-07-05 13:50:24,789][03445] Num frames 13800... -[2024-07-05 13:50:24,850][03445] Num frames 13900... -[2024-07-05 13:50:24,908][03445] Num frames 14000... -[2024-07-05 13:50:24,969][03445] Num frames 14100... -[2024-07-05 13:50:25,028][03445] Num frames 14200... -[2024-07-05 13:50:25,086][03445] Num frames 14300... -[2024-07-05 13:50:25,147][03445] Num frames 14400... -[2024-07-05 13:50:25,207][03445] Num frames 14500... -[2024-07-05 13:50:25,269][03445] Num frames 14600... -[2024-07-05 13:50:25,334][03445] Avg episode rewards: #0: 60.594, true rewards: #0: 20.881 -[2024-07-05 13:50:25,336][03445] Avg episode reward: 60.594, avg true_objective: 20.881 -[2024-07-05 13:50:25,388][03445] Num frames 14700... -[2024-07-05 13:50:25,448][03445] Num frames 14800... -[2024-07-05 13:50:25,504][03445] Num frames 14900... -[2024-07-05 13:50:25,565][03445] Num frames 15000... -[2024-07-05 13:50:25,623][03445] Num frames 15100... -[2024-07-05 13:50:25,684][03445] Num frames 15200... -[2024-07-05 13:50:25,748][03445] Num frames 15300... -[2024-07-05 13:50:25,810][03445] Num frames 15400... -[2024-07-05 13:50:25,876][03445] Num frames 15500... -[2024-07-05 13:50:25,942][03445] Num frames 15600... -[2024-07-05 13:50:26,005][03445] Num frames 15700... -[2024-07-05 13:50:26,069][03445] Num frames 15800... -[2024-07-05 13:50:26,131][03445] Num frames 15900... -[2024-07-05 13:50:26,192][03445] Num frames 16000... -[2024-07-05 13:50:26,257][03445] Num frames 16100... -[2024-07-05 13:50:26,320][03445] Num frames 16200... -[2024-07-05 13:50:26,381][03445] Num frames 16300... -[2024-07-05 13:50:26,443][03445] Num frames 16400... -[2024-07-05 13:50:26,505][03445] Num frames 16500... -[2024-07-05 13:50:26,572][03445] Num frames 16600... -[2024-07-05 13:50:26,636][03445] Num frames 16700... -[2024-07-05 13:50:26,704][03445] Avg episode rewards: #0: 60.645, true rewards: #0: 20.896 -[2024-07-05 13:50:26,706][03445] Avg episode reward: 60.645, avg true_objective: 20.896 -[2024-07-05 13:50:26,769][03445] Num frames 16800... -[2024-07-05 13:50:26,832][03445] Num frames 16900... -[2024-07-05 13:50:26,894][03445] Num frames 17000... -[2024-07-05 13:50:26,957][03445] Num frames 17100... -[2024-07-05 13:50:27,017][03445] Num frames 17200... -[2024-07-05 13:50:27,077][03445] Num frames 17300... -[2024-07-05 13:50:27,138][03445] Num frames 17400... -[2024-07-05 13:50:27,201][03445] Num frames 17500... -[2024-07-05 13:50:27,261][03445] Num frames 17600... -[2024-07-05 13:50:27,323][03445] Num frames 17700... -[2024-07-05 13:50:27,385][03445] Num frames 17800... -[2024-07-05 13:50:27,450][03445] Num frames 17900... -[2024-07-05 13:50:27,514][03445] Num frames 18000... -[2024-07-05 13:50:27,580][03445] Num frames 18100... -[2024-07-05 13:50:27,646][03445] Num frames 18200... -[2024-07-05 13:50:27,713][03445] Num frames 18300... -[2024-07-05 13:50:27,777][03445] Num frames 18400... -[2024-07-05 13:50:27,837][03445] Num frames 18500... -[2024-07-05 13:50:27,900][03445] Num frames 18600... -[2024-07-05 13:50:27,966][03445] Num frames 18700... -[2024-07-05 13:50:28,029][03445] Num frames 18800... -[2024-07-05 13:50:28,095][03445] Avg episode rewards: #0: 60.573, true rewards: #0: 20.908 -[2024-07-05 13:50:28,096][03445] Avg episode reward: 60.573, avg true_objective: 20.908 -[2024-07-05 13:50:28,148][03445] Num frames 18900... -[2024-07-05 13:50:28,209][03445] Num frames 19000... -[2024-07-05 13:50:28,271][03445] Num frames 19100... -[2024-07-05 13:50:28,331][03445] Num frames 19200... -[2024-07-05 13:50:28,392][03445] Num frames 19300... -[2024-07-05 13:50:28,452][03445] Num frames 19400... -[2024-07-05 13:50:28,512][03445] Num frames 19500... -[2024-07-05 13:50:28,574][03445] Num frames 19600... -[2024-07-05 13:50:28,639][03445] Num frames 19700... -[2024-07-05 13:50:28,700][03445] Num frames 19800... -[2024-07-05 13:50:28,762][03445] Num frames 19900... -[2024-07-05 13:50:28,823][03445] Num frames 20000... -[2024-07-05 13:50:28,884][03445] Num frames 20100... -[2024-07-05 13:50:28,955][03445] Num frames 20200... -[2024-07-05 13:50:29,017][03445] Num frames 20300... -[2024-07-05 13:50:29,082][03445] Num frames 20400... -[2024-07-05 13:50:29,145][03445] Num frames 20500... -[2024-07-05 13:50:29,207][03445] Num frames 20600... -[2024-07-05 13:50:29,271][03445] Num frames 20700... -[2024-07-05 13:50:29,333][03445] Num frames 20800... -[2024-07-05 13:50:29,395][03445] Num frames 20900... -[2024-07-05 13:50:29,459][03445] Avg episode rewards: #0: 60.816, true rewards: #0: 20.917 -[2024-07-05 13:50:29,460][03445] Avg episode reward: 60.816, avg true_objective: 20.917 -[2024-07-05 13:50:50,767][03445] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 13:53:48,171][03359] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 13:53:48,172][03359] Rollout worker 0 uses device cpu -[2024-07-05 13:53:48,173][03359] Rollout worker 1 uses device cpu -[2024-07-05 13:53:48,173][03359] Rollout worker 2 uses device cpu -[2024-07-05 13:53:48,173][03359] Rollout worker 3 uses device cpu -[2024-07-05 13:53:48,174][03359] Rollout worker 4 uses device cpu -[2024-07-05 13:53:48,174][03359] Rollout worker 5 uses device cpu -[2024-07-05 13:53:48,174][03359] Rollout worker 6 uses device cpu -[2024-07-05 13:53:48,174][03359] Rollout worker 7 uses device cpu -[2024-07-05 13:53:48,174][03359] Rollout worker 8 uses device cpu -[2024-07-05 13:53:48,175][03359] Rollout worker 9 uses device cpu -[2024-07-05 13:53:48,175][03359] Rollout worker 10 uses device cpu -[2024-07-05 13:53:48,175][03359] Rollout worker 11 uses device cpu -[2024-07-05 13:53:48,175][03359] Rollout worker 12 uses device cpu -[2024-07-05 13:53:48,176][03359] Rollout worker 13 uses device cpu -[2024-07-05 13:53:48,176][03359] Rollout worker 14 uses device cpu -[2024-07-05 13:53:48,176][03359] Rollout worker 15 uses device cpu -[2024-07-05 13:53:48,274][03359] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:53:48,275][03359] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 13:53:48,348][03359] Starting all processes... -[2024-07-05 13:53:48,348][03359] Starting process learner_proc0 -[2024-07-05 13:53:48,909][03359] Starting all processes... -[2024-07-05 13:53:48,914][03359] Starting process inference_proc0-0 -[2024-07-05 13:53:48,914][03359] Starting process rollout_proc0 -[2024-07-05 13:53:48,915][03359] Starting process rollout_proc1 -[2024-07-05 13:53:48,915][03359] Starting process rollout_proc2 -[2024-07-05 13:53:48,916][03359] Starting process rollout_proc3 -[2024-07-05 13:53:48,916][03359] Starting process rollout_proc4 -[2024-07-05 13:53:48,918][03359] Starting process rollout_proc5 -[2024-07-05 13:53:48,919][03359] Starting process rollout_proc6 -[2024-07-05 13:53:48,919][03359] Starting process rollout_proc7 -[2024-07-05 13:53:48,919][03359] Starting process rollout_proc8 -[2024-07-05 13:53:48,923][03359] Starting process rollout_proc9 -[2024-07-05 13:53:48,924][03359] Starting process rollout_proc10 -[2024-07-05 13:53:48,924][03359] Starting process rollout_proc11 -[2024-07-05 13:53:48,925][03359] Starting process rollout_proc12 -[2024-07-05 13:53:48,926][03359] Starting process rollout_proc13 -[2024-07-05 13:53:48,926][03359] Starting process rollout_proc14 -[2024-07-05 13:53:48,947][03359] Starting process rollout_proc15 -[2024-07-05 13:53:52,777][03936] Worker 2 uses CPU cores [2] -[2024-07-05 13:53:52,783][03942] Worker 9 uses CPU cores [9] -[2024-07-05 13:53:52,787][03939] Worker 6 uses CPU cores [6] -[2024-07-05 13:53:52,805][03940] Worker 7 uses CPU cores [7] -[2024-07-05 13:53:52,932][03944] Worker 11 uses CPU cores [11] -[2024-07-05 13:53:52,978][03943] Worker 10 uses CPU cores [10] -[2024-07-05 13:53:53,002][03912] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:53:53,002][03912] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 13:53:53,015][03962] Worker 14 uses CPU cores [14] -[2024-07-05 13:53:53,047][03938] Worker 5 uses CPU cores [5] -[2024-07-05 13:53:53,051][03941] Worker 8 uses CPU cores [8] -[2024-07-05 13:53:53,058][03912] Num visible devices: 1 -[2024-07-05 13:53:53,063][03951] Worker 12 uses CPU cores [12] -[2024-07-05 13:53:53,127][03912] Setting fixed seed 200 -[2024-07-05 13:53:53,137][03912] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:53:53,137][03912] Initializing actor-critic model on device cuda:0 -[2024-07-05 13:53:53,138][03912] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 13:53:53,139][03912] RunningMeanStd input shape: (1,) -[2024-07-05 13:53:53,151][03912] ConvEncoder: input_channels=3 -[2024-07-05 13:53:53,223][03934] Worker 1 uses CPU cores [1] -[2024-07-05 13:53:53,255][03963] Worker 15 uses CPU cores [15] -[2024-07-05 13:53:53,256][03912] Conv encoder output size: 512 -[2024-07-05 13:53:53,256][03912] Policy head output size: 512 -[2024-07-05 13:53:53,276][03912] Created Actor Critic model with architecture: -[2024-07-05 13:53:53,276][03912] ActorCriticSharedWeights( - (obs_normalizer): ObservationNormalizer( - (running_mean_std): RunningMeanStdDictInPlace( - (running_mean_std): ModuleDict( - (obs): RunningMeanStdInPlace() - ) - ) - ) - (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) - (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) + (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (5): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (6): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (7): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) + (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (9): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (10): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) + (11): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (12): ELU(alpha=1.0) + ) + (mlp_layers): Sequential( + (0): Linear(in_features=4608, out_features=512, bias=True) + (1): ELU(alpha=1.0) ) ) ) @@ -15634,1608 +2787,720 @@ main_loop: 1043.3001 (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-07-05 13:53:53,283][03937] Worker 4 uses CPU cores [4] -[2024-07-05 13:53:53,389][03912] Using optimizer -[2024-07-05 13:53:53,395][03961] Worker 13 uses CPU cores [13] -[2024-07-05 13:53:53,497][03933] Worker 0 uses CPU cores [0] -[2024-07-05 13:53:53,559][03935] Worker 3 uses CPU cores [3] -[2024-07-05 13:53:53,610][03932] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:53:53,610][03932] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 13:53:53,650][03932] Num visible devices: 1 -[2024-07-05 13:53:53,903][03912] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000045168_350011392.pth... -[2024-07-05 13:53:53,947][03912] Loading model from checkpoint -[2024-07-05 13:53:53,948][03912] Loaded experiment state at self.train_step=45168, self.env_steps=350011392 -[2024-07-05 13:53:53,948][03912] Initialized policy 0 weights for model version 45168 -[2024-07-05 13:53:53,950][03912] LearnerWorker_p0 finished initialization! -[2024-07-05 13:53:53,950][03912] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 13:53:54,011][03932] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 13:53:54,012][03932] RunningMeanStd input shape: (1,) -[2024-07-05 13:53:54,019][03932] ConvEncoder: input_channels=3 -[2024-07-05 13:53:54,071][03932] Conv encoder output size: 512 -[2024-07-05 13:53:54,071][03932] Policy head output size: 512 -[2024-07-05 13:53:54,103][03359] Inference worker 0-0 is ready! -[2024-07-05 13:53:54,103][03359] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 13:53:54,149][03938] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,151][03942] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,151][03935] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,152][03944] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,155][03936] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,155][03951] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,156][03939] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,156][03961] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,156][03940] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,159][03934] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,164][03941] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,169][03963] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,169][03933] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,173][03962] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,173][03943] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,187][03937] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 13:53:54,875][03951] Decorrelating experience for 0 frames... -[2024-07-05 13:53:54,875][03940] Decorrelating experience for 0 frames... -[2024-07-05 13:53:54,875][03944] Decorrelating experience for 0 frames... -[2024-07-05 13:53:54,875][03938] Decorrelating experience for 0 frames... -[2024-07-05 13:53:54,875][03942] Decorrelating experience for 0 frames... -[2024-07-05 13:53:54,875][03941] Decorrelating experience for 0 frames... -[2024-07-05 13:53:54,875][03935] Decorrelating experience for 0 frames... -[2024-07-05 13:53:54,875][03936] Decorrelating experience for 0 frames... -[2024-07-05 13:53:55,028][03951] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,028][03942] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,029][03944] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,029][03937] Decorrelating experience for 0 frames... -[2024-07-05 13:53:55,029][03936] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,060][03933] Decorrelating experience for 0 frames... -[2024-07-05 13:53:55,081][03940] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,107][03939] Decorrelating experience for 0 frames... -[2024-07-05 13:53:55,181][03935] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,181][03937] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,192][03944] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,192][03951] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,215][03933] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,283][03941] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,297][03942] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,311][03963] Decorrelating experience for 0 frames... -[2024-07-05 13:53:55,347][03936] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,364][03943] Decorrelating experience for 0 frames... -[2024-07-05 13:53:55,364][03951] Decorrelating experience for 96 frames... -[2024-07-05 13:53:55,415][03938] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,458][03940] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,484][03937] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,521][03943] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,526][03942] Decorrelating experience for 96 frames... -[2024-07-05 13:53:55,549][03935] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,557][03962] Decorrelating experience for 0 frames... -[2024-07-05 13:53:55,563][03941] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,608][03951] Decorrelating experience for 128 frames... -[2024-07-05 13:53:55,642][03939] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,644][03963] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,690][03943] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,724][03934] Decorrelating experience for 0 frames... -[2024-07-05 13:53:55,760][03937] Decorrelating experience for 96 frames... -[2024-07-05 13:53:55,763][03940] Decorrelating experience for 96 frames... -[2024-07-05 13:53:55,776][03935] Decorrelating experience for 96 frames... -[2024-07-05 13:53:55,792][03938] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,813][03963] Decorrelating experience for 64 frames... -[2024-07-05 13:53:55,875][03943] Decorrelating experience for 96 frames... -[2024-07-05 13:53:55,937][03934] Decorrelating experience for 32 frames... -[2024-07-05 13:53:55,938][03942] Decorrelating experience for 128 frames... -[2024-07-05 13:53:55,983][03936] Decorrelating experience for 96 frames... -[2024-07-05 13:53:56,014][03940] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,025][03951] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,034][03961] Decorrelating experience for 0 frames... -[2024-07-05 13:53:56,115][03935] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,139][03963] Decorrelating experience for 96 frames... -[2024-07-05 13:53:56,155][03941] Decorrelating experience for 96 frames... -[2024-07-05 13:53:56,187][03933] Decorrelating experience for 64 frames... -[2024-07-05 13:53:56,203][03942] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,214][03943] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,302][03961] Decorrelating experience for 32 frames... -[2024-07-05 13:53:56,316][03936] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,325][03939] Decorrelating experience for 64 frames... -[2024-07-05 13:53:56,372][03962] Decorrelating experience for 32 frames... -[2024-07-05 13:53:56,382][03935] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,387][03944] Decorrelating experience for 96 frames... -[2024-07-05 13:53:56,387][03937] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,474][03940] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,537][03938] Decorrelating experience for 96 frames... -[2024-07-05 13:53:56,553][03941] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,555][03934] Decorrelating experience for 64 frames... -[2024-07-05 13:53:56,595][03935] Decorrelating experience for 192 frames... -[2024-07-05 13:53:56,653][03937] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,687][03942] Decorrelating experience for 192 frames... -[2024-07-05 13:53:56,719][03933] Decorrelating experience for 96 frames... -[2024-07-05 13:53:56,726][03940] Decorrelating experience for 192 frames... -[2024-07-05 13:53:56,753][03936] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,767][03939] Decorrelating experience for 96 frames... -[2024-07-05 13:53:56,809][03941] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,352][03937] Decorrelating experience for 192 frames... -[2024-07-05 13:53:56,363][03961] Decorrelating experience for 64 frames... -[2024-07-05 13:53:56,388][03943] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,428][03963] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,477][03934] Decorrelating experience for 96 frames... -[2024-07-05 13:53:56,501][03944] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,516][03933] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,522][03935] Decorrelating experience for 224 frames... -[2024-07-05 13:53:56,552][03938] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,650][03940] Decorrelating experience for 224 frames... -[2024-07-05 13:53:56,689][03961] Decorrelating experience for 96 frames... -[2024-07-05 13:53:56,715][03939] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,715][03943] Decorrelating experience for 192 frames... -[2024-07-05 13:53:56,740][03934] Decorrelating experience for 128 frames... -[2024-07-05 13:53:56,751][03938] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,772][03944] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,851][03942] Decorrelating experience for 224 frames... -[2024-07-05 13:53:56,878][03963] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,907][03937] Decorrelating experience for 224 frames... -[2024-07-05 13:53:56,922][03941] Decorrelating experience for 192 frames... -[2024-07-05 13:53:56,930][03939] Decorrelating experience for 160 frames... -[2024-07-05 13:53:56,930][03933] Decorrelating experience for 160 frames... -[2024-07-05 13:53:57,001][03943] Decorrelating experience for 224 frames... -[2024-07-05 13:53:57,079][03951] Decorrelating experience for 192 frames... -[2024-07-05 13:53:57,098][03938] Decorrelating experience for 192 frames... -[2024-07-05 13:53:57,108][03944] Decorrelating experience for 192 frames... -[2024-07-05 13:53:57,145][03939] Decorrelating experience for 192 frames... -[2024-07-05 13:53:57,145][03961] Decorrelating experience for 128 frames... -[2024-07-05 13:53:57,251][03934] Decorrelating experience for 160 frames... -[2024-07-05 13:53:57,293][03962] Decorrelating experience for 64 frames... -[2024-07-05 13:53:57,308][03941] Decorrelating experience for 224 frames... -[2024-07-05 13:53:57,312][03936] Decorrelating experience for 192 frames... -[2024-07-05 13:53:57,339][03944] Decorrelating experience for 224 frames... -[2024-07-05 13:53:57,356][03961] Decorrelating experience for 160 frames... -[2024-07-05 13:53:57,376][03939] Decorrelating experience for 224 frames... -[2024-07-05 13:53:57,475][03934] Decorrelating experience for 192 frames... -[2024-07-05 13:53:57,494][03938] Decorrelating experience for 224 frames... -[2024-07-05 13:53:57,511][03951] Decorrelating experience for 224 frames... -[2024-07-05 13:53:57,573][03936] Decorrelating experience for 224 frames... -[2024-07-05 13:53:57,683][03962] Decorrelating experience for 96 frames... -[2024-07-05 13:53:57,716][03961] Decorrelating experience for 192 frames... -[2024-07-05 13:53:57,746][03934] Decorrelating experience for 224 frames... -[2024-07-05 13:53:57,979][03963] Decorrelating experience for 192 frames... -[2024-07-05 13:53:58,028][03961] Decorrelating experience for 224 frames... -[2024-07-05 13:53:58,037][03962] Decorrelating experience for 128 frames... -[2024-07-05 13:53:58,115][03912] Signal inference workers to stop experience collection... -[2024-07-05 13:53:58,165][03932] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 13:53:58,191][03359] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 350011392. Throughput: 0: nan. Samples: 64. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 13:53:58,192][03359] Avg episode reward: [(0, '1.649')] -[2024-07-05 13:53:58,226][03933] Decorrelating experience for 192 frames... -[2024-07-05 13:53:58,290][03963] Decorrelating experience for 224 frames... -[2024-07-05 13:53:58,315][03962] Decorrelating experience for 160 frames... -[2024-07-05 13:53:58,440][03933] Decorrelating experience for 224 frames... -[2024-07-05 13:53:58,533][03962] Decorrelating experience for 192 frames... -[2024-07-05 13:53:58,737][03962] Decorrelating experience for 224 frames... -[2024-07-05 13:53:59,624][03912] Signal inference workers to resume experience collection... -[2024-07-05 13:53:59,624][03932] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 13:54:01,353][03932] Updated weights for policy 0, policy_version 45178 (0.0132) -[2024-07-05 13:54:03,148][03932] Updated weights for policy 0, policy_version 45188 (0.0008) -[2024-07-05 13:54:03,191][03359] Fps is (10 sec: 32768.3, 60 sec: 32768.3, 300 sec: 32768.3). Total num frames: 350175232. Throughput: 0: 8733.7. Samples: 43732. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:54:03,192][03359] Avg episode reward: [(0, '12.234')] -[2024-07-05 13:54:04,913][03932] Updated weights for policy 0, policy_version 45198 (0.0008) -[2024-07-05 13:54:06,671][03932] Updated weights for policy 0, policy_version 45208 (0.0007) -[2024-07-05 13:54:08,191][03359] Fps is (10 sec: 39322.0, 60 sec: 39322.0, 300 sec: 39322.0). Total num frames: 350404608. Throughput: 0: 7793.7. Samples: 78000. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:54:08,192][03359] Avg episode reward: [(0, '54.167')] -[2024-07-05 13:54:08,268][03359] Heartbeat connected on Batcher_0 -[2024-07-05 13:54:08,271][03359] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 13:54:08,279][03359] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 13:54:08,279][03359] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 13:54:08,283][03359] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 13:54:08,286][03359] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 13:54:08,296][03359] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 13:54:08,297][03359] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 13:54:08,298][03359] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 13:54:08,302][03359] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 13:54:08,303][03359] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 13:54:08,308][03359] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 13:54:08,338][03359] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 13:54:08,339][03359] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 13:54:08,339][03359] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 13:54:08,342][03359] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 13:54:08,344][03359] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 13:54:08,347][03359] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 13:54:08,356][03932] Updated weights for policy 0, policy_version 45218 (0.0008) -[2024-07-05 13:54:08,360][03359] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 13:54:10,051][03932] Updated weights for policy 0, policy_version 45228 (0.0011) -[2024-07-05 13:54:11,742][03932] Updated weights for policy 0, policy_version 45238 (0.0008) -[2024-07-05 13:54:13,191][03359] Fps is (10 sec: 47514.4, 60 sec: 42599.0, 300 sec: 42599.0). Total num frames: 350650368. Throughput: 0: 10026.8. Samples: 150464. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:54:13,192][03359] Avg episode reward: [(0, '55.862')] -[2024-07-05 13:54:13,419][03932] Updated weights for policy 0, policy_version 45248 (0.0011) -[2024-07-05 13:54:15,067][03932] Updated weights for policy 0, policy_version 45258 (0.0010) -[2024-07-05 13:54:16,716][03932] Updated weights for policy 0, policy_version 45268 (0.0008) -[2024-07-05 13:54:18,191][03359] Fps is (10 sec: 49152.3, 60 sec: 44237.2, 300 sec: 44237.2). Total num frames: 350896128. Throughput: 0: 11174.3. Samples: 223548. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:54:18,192][03359] Avg episode reward: [(0, '55.576')] -[2024-07-05 13:54:18,428][03932] Updated weights for policy 0, policy_version 45278 (0.0010) -[2024-07-05 13:54:20,151][03932] Updated weights for policy 0, policy_version 45288 (0.0010) -[2024-07-05 13:54:21,856][03932] Updated weights for policy 0, policy_version 45298 (0.0009) -[2024-07-05 13:54:23,191][03359] Fps is (10 sec: 48332.4, 60 sec: 44892.4, 300 sec: 44892.4). Total num frames: 351133696. Throughput: 0: 10398.8. Samples: 260032. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 13:54:23,192][03359] Avg episode reward: [(0, '55.669')] -[2024-07-05 13:54:23,512][03932] Updated weights for policy 0, policy_version 45308 (0.0010) -[2024-07-05 13:54:25,256][03932] Updated weights for policy 0, policy_version 45318 (0.0011) -[2024-07-05 13:54:26,915][03932] Updated weights for policy 0, policy_version 45328 (0.0008) -[2024-07-05 13:54:28,191][03359] Fps is (10 sec: 48332.0, 60 sec: 45602.1, 300 sec: 45602.1). Total num frames: 351379456. Throughput: 0: 11081.2. Samples: 332500. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0) -[2024-07-05 13:54:28,192][03359] Avg episode reward: [(0, '55.118')] -[2024-07-05 13:54:29,133][03932] Updated weights for policy 0, policy_version 45338 (0.0009) -[2024-07-05 13:54:30,846][03932] Updated weights for policy 0, policy_version 45348 (0.0010) -[2024-07-05 13:54:32,578][03932] Updated weights for policy 0, policy_version 45358 (0.0008) -[2024-07-05 13:54:33,191][03359] Fps is (10 sec: 45874.5, 60 sec: 45173.0, 300 sec: 45173.0). Total num frames: 351592448. Throughput: 0: 11375.5. Samples: 398208. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0) -[2024-07-05 13:54:33,193][03359] Avg episode reward: [(0, '53.648')] -[2024-07-05 13:54:34,374][03932] Updated weights for policy 0, policy_version 45368 (0.0009) -[2024-07-05 13:54:36,103][03932] Updated weights for policy 0, policy_version 45378 (0.0008) -[2024-07-05 13:54:37,848][03932] Updated weights for policy 0, policy_version 45388 (0.0008) -[2024-07-05 13:54:38,191][03359] Fps is (10 sec: 45056.9, 60 sec: 45465.8, 300 sec: 45465.8). Total num frames: 351830016. Throughput: 0: 10816.9. Samples: 432736. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0) -[2024-07-05 13:54:38,192][03359] Avg episode reward: [(0, '53.950')] -[2024-07-05 13:54:39,527][03932] Updated weights for policy 0, policy_version 45398 (0.0008) -[2024-07-05 13:54:41,217][03932] Updated weights for policy 0, policy_version 45408 (0.0008) -[2024-07-05 13:54:42,852][03932] Updated weights for policy 0, policy_version 45418 (0.0011) -[2024-07-05 13:54:43,191][03359] Fps is (10 sec: 48333.8, 60 sec: 45875.4, 300 sec: 45875.4). Total num frames: 352075776. Throughput: 0: 11233.7. Samples: 505576. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0) -[2024-07-05 13:54:43,192][03359] Avg episode reward: [(0, '55.561')] -[2024-07-05 13:54:44,543][03932] Updated weights for policy 0, policy_version 45428 (0.0008) -[2024-07-05 13:54:46,181][03932] Updated weights for policy 0, policy_version 45438 (0.0007) -[2024-07-05 13:54:47,834][03932] Updated weights for policy 0, policy_version 45448 (0.0010) -[2024-07-05 13:54:48,191][03359] Fps is (10 sec: 49151.9, 60 sec: 46203.0, 300 sec: 46203.0). Total num frames: 352321536. Throughput: 0: 11899.6. Samples: 579212. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0) -[2024-07-05 13:54:48,192][03359] Avg episode reward: [(0, '54.385')] -[2024-07-05 13:54:49,541][03932] Updated weights for policy 0, policy_version 45458 (0.0008) -[2024-07-05 13:54:51,295][03932] Updated weights for policy 0, policy_version 45468 (0.0008) -[2024-07-05 13:54:53,068][03932] Updated weights for policy 0, policy_version 45478 (0.0011) -[2024-07-05 13:54:53,191][03359] Fps is (10 sec: 47513.3, 60 sec: 46173.2, 300 sec: 46173.2). Total num frames: 352550912. Throughput: 0: 11928.4. Samples: 614776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 4.0) -[2024-07-05 13:54:53,192][03359] Avg episode reward: [(0, '54.426')] -[2024-07-05 13:54:54,760][03932] Updated weights for policy 0, policy_version 45488 (0.0008) -[2024-07-05 13:54:56,435][03932] Updated weights for policy 0, policy_version 45498 (0.0013) -[2024-07-05 13:54:58,164][03932] Updated weights for policy 0, policy_version 45508 (0.0010) -[2024-07-05 13:54:58,191][03359] Fps is (10 sec: 47512.8, 60 sec: 46421.3, 300 sec: 46421.3). Total num frames: 352796672. Throughput: 0: 11912.9. Samples: 686548. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:54:58,192][03359] Avg episode reward: [(0, '54.768')] -[2024-07-05 13:54:59,816][03932] Updated weights for policy 0, policy_version 45518 (0.0008) -[2024-07-05 13:55:01,538][03932] Updated weights for policy 0, policy_version 45528 (0.0008) -[2024-07-05 13:55:03,191][03359] Fps is (10 sec: 48332.5, 60 sec: 47650.2, 300 sec: 46505.4). Total num frames: 353034240. Throughput: 0: 11893.6. Samples: 758760. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:03,193][03359] Avg episode reward: [(0, '52.510')] -[2024-07-05 13:55:03,256][03932] Updated weights for policy 0, policy_version 45538 (0.0008) -[2024-07-05 13:55:04,957][03932] Updated weights for policy 0, policy_version 45548 (0.0008) -[2024-07-05 13:55:06,644][03932] Updated weights for policy 0, policy_version 45558 (0.0008) -[2024-07-05 13:55:08,191][03359] Fps is (10 sec: 48333.7, 60 sec: 47923.3, 300 sec: 46694.5). Total num frames: 353280000. Throughput: 0: 11888.3. Samples: 795004. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:08,192][03359] Avg episode reward: [(0, '52.314')] -[2024-07-05 13:55:08,352][03932] Updated weights for policy 0, policy_version 45568 (0.0008) -[2024-07-05 13:55:10,028][03932] Updated weights for policy 0, policy_version 45578 (0.0011) -[2024-07-05 13:55:11,723][03932] Updated weights for policy 0, policy_version 45588 (0.0010) -[2024-07-05 13:55:13,191][03359] Fps is (10 sec: 48333.4, 60 sec: 47786.7, 300 sec: 46749.1). Total num frames: 353517568. Throughput: 0: 11889.4. Samples: 867520. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:13,192][03359] Avg episode reward: [(0, '54.182')] -[2024-07-05 13:55:13,383][03932] Updated weights for policy 0, policy_version 45598 (0.0010) -[2024-07-05 13:55:15,058][03932] Updated weights for policy 0, policy_version 45608 (0.0012) -[2024-07-05 13:55:16,761][03932] Updated weights for policy 0, policy_version 45618 (0.0008) -[2024-07-05 13:55:18,191][03359] Fps is (10 sec: 48333.3, 60 sec: 47786.8, 300 sec: 46899.4). Total num frames: 353763328. Throughput: 0: 12044.3. Samples: 940200. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:18,192][03359] Avg episode reward: [(0, '55.814')] -[2024-07-05 13:55:18,467][03932] Updated weights for policy 0, policy_version 45628 (0.0007) -[2024-07-05 13:55:20,156][03932] Updated weights for policy 0, policy_version 45638 (0.0007) -[2024-07-05 13:55:21,861][03932] Updated weights for policy 0, policy_version 45648 (0.0010) -[2024-07-05 13:55:23,191][03359] Fps is (10 sec: 48332.9, 60 sec: 47786.7, 300 sec: 46935.5). Total num frames: 354000896. Throughput: 0: 12088.8. Samples: 976732. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:23,192][03359] Avg episode reward: [(0, '55.124')] -[2024-07-05 13:55:23,541][03932] Updated weights for policy 0, policy_version 45658 (0.0008) -[2024-07-05 13:55:25,256][03932] Updated weights for policy 0, policy_version 45668 (0.0008) -[2024-07-05 13:55:26,909][03932] Updated weights for policy 0, policy_version 45678 (0.0007) -[2024-07-05 13:55:28,191][03359] Fps is (10 sec: 48332.3, 60 sec: 47786.8, 300 sec: 47058.6). Total num frames: 354246656. Throughput: 0: 12080.8. Samples: 1049212. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:28,192][03359] Avg episode reward: [(0, '53.987')] -[2024-07-05 13:55:28,629][03932] Updated weights for policy 0, policy_version 45688 (0.0010) -[2024-07-05 13:55:30,322][03932] Updated weights for policy 0, policy_version 45698 (0.0008) -[2024-07-05 13:55:32,024][03932] Updated weights for policy 0, policy_version 45708 (0.0008) -[2024-07-05 13:55:33,191][03359] Fps is (10 sec: 49152.0, 60 sec: 48333.0, 300 sec: 47168.8). Total num frames: 354492416. Throughput: 0: 12062.5. Samples: 1122024. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:33,192][03359] Avg episode reward: [(0, '53.181')] -[2024-07-05 13:55:33,706][03932] Updated weights for policy 0, policy_version 45718 (0.0008) -[2024-07-05 13:55:35,392][03932] Updated weights for policy 0, policy_version 45728 (0.0008) -[2024-07-05 13:55:37,090][03932] Updated weights for policy 0, policy_version 45738 (0.0007) -[2024-07-05 13:55:38,191][03359] Fps is (10 sec: 48332.5, 60 sec: 48332.7, 300 sec: 47186.0). Total num frames: 354729984. Throughput: 0: 12079.9. Samples: 1158372. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:38,192][03359] Avg episode reward: [(0, '54.812')] -[2024-07-05 13:55:38,795][03932] Updated weights for policy 0, policy_version 45748 (0.0008) -[2024-07-05 13:55:40,489][03932] Updated weights for policy 0, policy_version 45758 (0.0008) -[2024-07-05 13:55:42,113][03932] Updated weights for policy 0, policy_version 45768 (0.0008) -[2024-07-05 13:55:43,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 47279.6). Total num frames: 354975744. Throughput: 0: 12104.1. Samples: 1231228. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:43,192][03359] Avg episode reward: [(0, '56.086')] -[2024-07-05 13:55:43,196][03912] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000045774_354975744.pth... -[2024-07-05 13:55:43,269][03912] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000044667_345907200.pth -[2024-07-05 13:55:43,787][03932] Updated weights for policy 0, policy_version 45778 (0.0008) -[2024-07-05 13:55:45,507][03932] Updated weights for policy 0, policy_version 45788 (0.0008) -[2024-07-05 13:55:47,227][03932] Updated weights for policy 0, policy_version 45798 (0.0008) -[2024-07-05 13:55:48,191][03359] Fps is (10 sec: 48332.4, 60 sec: 48196.2, 300 sec: 47290.2). Total num frames: 355213312. Throughput: 0: 12101.3. Samples: 1303320. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:48,193][03359] Avg episode reward: [(0, '54.206')] -[2024-07-05 13:55:48,925][03932] Updated weights for policy 0, policy_version 45808 (0.0007) -[2024-07-05 13:55:50,599][03932] Updated weights for policy 0, policy_version 45818 (0.0010) -[2024-07-05 13:55:52,275][03932] Updated weights for policy 0, policy_version 45828 (0.0009) -[2024-07-05 13:55:53,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48469.4, 300 sec: 47371.2). Total num frames: 355459072. Throughput: 0: 12103.0. Samples: 1339640. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:55:53,192][03359] Avg episode reward: [(0, '52.577')] -[2024-07-05 13:55:54,004][03932] Updated weights for policy 0, policy_version 45838 (0.0007) -[2024-07-05 13:55:55,701][03932] Updated weights for policy 0, policy_version 45848 (0.0008) -[2024-07-05 13:55:57,401][03932] Updated weights for policy 0, policy_version 45858 (0.0010) -[2024-07-05 13:55:58,191][03359] Fps is (10 sec: 48333.2, 60 sec: 48332.9, 300 sec: 47377.1). Total num frames: 355696640. Throughput: 0: 12108.0. Samples: 1412380. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:55:58,192][03359] Avg episode reward: [(0, '54.809')] -[2024-07-05 13:55:59,126][03932] Updated weights for policy 0, policy_version 45868 (0.0012) -[2024-07-05 13:56:00,833][03932] Updated weights for policy 0, policy_version 45878 (0.0008) -[2024-07-05 13:56:02,491][03932] Updated weights for policy 0, policy_version 45888 (0.0007) -[2024-07-05 13:56:03,191][03359] Fps is (10 sec: 48332.5, 60 sec: 48469.4, 300 sec: 47448.1). Total num frames: 355942400. Throughput: 0: 12095.2. Samples: 1484484. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:56:03,192][03359] Avg episode reward: [(0, '51.490')] -[2024-07-05 13:56:04,165][03932] Updated weights for policy 0, policy_version 45898 (0.0008) -[2024-07-05 13:56:05,924][03932] Updated weights for policy 0, policy_version 45908 (0.0008) -[2024-07-05 13:56:07,616][03932] Updated weights for policy 0, policy_version 45918 (0.0010) -[2024-07-05 13:56:08,191][03359] Fps is (10 sec: 48332.5, 60 sec: 48332.7, 300 sec: 47450.6). Total num frames: 356179968. Throughput: 0: 12083.6. Samples: 1520496. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:56:08,192][03359] Avg episode reward: [(0, '53.751')] -[2024-07-05 13:56:09,308][03932] Updated weights for policy 0, policy_version 45928 (0.0008) -[2024-07-05 13:56:10,992][03932] Updated weights for policy 0, policy_version 45938 (0.0008) -[2024-07-05 13:56:12,669][03932] Updated weights for policy 0, policy_version 45948 (0.0008) -[2024-07-05 13:56:13,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48469.3, 300 sec: 47513.7). Total num frames: 356425728. Throughput: 0: 12097.6. Samples: 1593604. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:56:13,192][03359] Avg episode reward: [(0, '55.732')] -[2024-07-05 13:56:14,358][03932] Updated weights for policy 0, policy_version 45958 (0.0008) -[2024-07-05 13:56:16,052][03932] Updated weights for policy 0, policy_version 45968 (0.0008) -[2024-07-05 13:56:17,755][03932] Updated weights for policy 0, policy_version 45978 (0.0007) -[2024-07-05 13:56:18,191][03359] Fps is (10 sec: 48333.3, 60 sec: 48332.7, 300 sec: 47513.7). Total num frames: 356663296. Throughput: 0: 12089.8. Samples: 1666068. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:56:18,192][03359] Avg episode reward: [(0, '53.614')] -[2024-07-05 13:56:19,418][03932] Updated weights for policy 0, policy_version 45988 (0.0008) -[2024-07-05 13:56:21,072][03932] Updated weights for policy 0, policy_version 45998 (0.0008) -[2024-07-05 13:56:22,798][03932] Updated weights for policy 0, policy_version 46008 (0.0008) -[2024-07-05 13:56:23,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48469.3, 300 sec: 47570.2). Total num frames: 356909056. Throughput: 0: 12086.3. Samples: 1702256. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:56:23,192][03359] Avg episode reward: [(0, '53.981')] -[2024-07-05 13:56:24,477][03932] Updated weights for policy 0, policy_version 46018 (0.0008) -[2024-07-05 13:56:26,194][03932] Updated weights for policy 0, policy_version 46028 (0.0008) -[2024-07-05 13:56:27,855][03932] Updated weights for policy 0, policy_version 46038 (0.0008) -[2024-07-05 13:56:28,191][03359] Fps is (10 sec: 49151.3, 60 sec: 48469.2, 300 sec: 47622.8). Total num frames: 357154816. Throughput: 0: 12087.5. Samples: 1775168. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:56:28,192][03359] Avg episode reward: [(0, '52.662')] -[2024-07-05 13:56:29,557][03932] Updated weights for policy 0, policy_version 46048 (0.0007) -[2024-07-05 13:56:31,243][03932] Updated weights for policy 0, policy_version 46058 (0.0008) -[2024-07-05 13:56:32,947][03932] Updated weights for policy 0, policy_version 46068 (0.0011) -[2024-07-05 13:56:33,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 47619.4). Total num frames: 357392384. Throughput: 0: 12097.9. Samples: 1847724. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:56:33,193][03359] Avg episode reward: [(0, '51.068')] -[2024-07-05 13:56:34,662][03932] Updated weights for policy 0, policy_version 46078 (0.0008) -[2024-07-05 13:56:36,328][03932] Updated weights for policy 0, policy_version 46088 (0.0007) -[2024-07-05 13:56:37,975][03932] Updated weights for policy 0, policy_version 46098 (0.0008) -[2024-07-05 13:56:38,191][03359] Fps is (10 sec: 47514.7, 60 sec: 48332.9, 300 sec: 47616.1). Total num frames: 357629952. Throughput: 0: 12101.7. Samples: 1884216. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:56:38,192][03359] Avg episode reward: [(0, '54.791')] -[2024-07-05 13:56:39,675][03932] Updated weights for policy 0, policy_version 46108 (0.0008) -[2024-07-05 13:56:41,383][03932] Updated weights for policy 0, policy_version 46118 (0.0008) -[2024-07-05 13:56:43,099][03932] Updated weights for policy 0, policy_version 46128 (0.0010) -[2024-07-05 13:56:43,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48332.9, 300 sec: 47662.6). Total num frames: 357875712. Throughput: 0: 12099.0. Samples: 1956832. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:56:43,192][03359] Avg episode reward: [(0, '52.133')] -[2024-07-05 13:56:44,790][03932] Updated weights for policy 0, policy_version 46138 (0.0008) -[2024-07-05 13:56:46,482][03932] Updated weights for policy 0, policy_version 46148 (0.0008) -[2024-07-05 13:56:48,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48333.0, 300 sec: 47658.2). Total num frames: 358113280. Throughput: 0: 12108.4. Samples: 2029360. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:56:48,192][03359] Avg episode reward: [(0, '53.814')] -[2024-07-05 13:56:48,202][03932] Updated weights for policy 0, policy_version 46158 (0.0007) -[2024-07-05 13:56:49,919][03932] Updated weights for policy 0, policy_version 46168 (0.0008) -[2024-07-05 13:56:51,578][03932] Updated weights for policy 0, policy_version 46178 (0.0008) -[2024-07-05 13:56:53,191][03359] Fps is (10 sec: 48331.9, 60 sec: 48332.7, 300 sec: 47700.9). Total num frames: 358359040. Throughput: 0: 12114.1. Samples: 2065628. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:56:53,192][03359] Avg episode reward: [(0, '54.799')] -[2024-07-05 13:56:53,265][03932] Updated weights for policy 0, policy_version 46188 (0.0008) -[2024-07-05 13:56:54,998][03932] Updated weights for policy 0, policy_version 46198 (0.0008) -[2024-07-05 13:56:56,681][03932] Updated weights for policy 0, policy_version 46208 (0.0008) -[2024-07-05 13:56:58,191][03359] Fps is (10 sec: 49151.6, 60 sec: 48469.4, 300 sec: 47741.2). Total num frames: 358604800. Throughput: 0: 12100.1. Samples: 2138108. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:56:58,192][03359] Avg episode reward: [(0, '55.529')] -[2024-07-05 13:56:58,368][03932] Updated weights for policy 0, policy_version 46218 (0.0008) -[2024-07-05 13:57:00,040][03932] Updated weights for policy 0, policy_version 46228 (0.0011) -[2024-07-05 13:57:01,727][03932] Updated weights for policy 0, policy_version 46238 (0.0008) -[2024-07-05 13:57:03,191][03359] Fps is (10 sec: 48333.3, 60 sec: 48332.8, 300 sec: 47735.1). Total num frames: 358842368. Throughput: 0: 12104.0. Samples: 2210748. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:57:03,192][03359] Avg episode reward: [(0, '54.791')] -[2024-07-05 13:57:03,419][03932] Updated weights for policy 0, policy_version 46248 (0.0007) -[2024-07-05 13:57:05,082][03932] Updated weights for policy 0, policy_version 46258 (0.0008) -[2024-07-05 13:57:06,765][03932] Updated weights for policy 0, policy_version 46268 (0.0008) -[2024-07-05 13:57:08,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48469.5, 300 sec: 47772.4). Total num frames: 359088128. Throughput: 0: 12110.2. Samples: 2247216. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:57:08,192][03359] Avg episode reward: [(0, '53.233')] -[2024-07-05 13:57:08,504][03932] Updated weights for policy 0, policy_version 46278 (0.0009) -[2024-07-05 13:57:10,226][03932] Updated weights for policy 0, policy_version 46288 (0.0008) -[2024-07-05 13:57:11,907][03932] Updated weights for policy 0, policy_version 46298 (0.0010) -[2024-07-05 13:57:13,191][03359] Fps is (10 sec: 48333.1, 60 sec: 48332.9, 300 sec: 47765.7). Total num frames: 359325696. Throughput: 0: 12096.4. Samples: 2319504. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:57:13,192][03359] Avg episode reward: [(0, '52.189')] -[2024-07-05 13:57:13,608][03932] Updated weights for policy 0, policy_version 46308 (0.0008) -[2024-07-05 13:57:15,333][03932] Updated weights for policy 0, policy_version 46318 (0.0008) -[2024-07-05 13:57:16,986][03932] Updated weights for policy 0, policy_version 46328 (0.0008) -[2024-07-05 13:57:18,191][03359] Fps is (10 sec: 48332.2, 60 sec: 48469.3, 300 sec: 47800.3). Total num frames: 359571456. Throughput: 0: 12102.7. Samples: 2392348. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:57:18,192][03359] Avg episode reward: [(0, '52.986')] -[2024-07-05 13:57:18,635][03932] Updated weights for policy 0, policy_version 46338 (0.0011) -[2024-07-05 13:57:20,329][03932] Updated weights for policy 0, policy_version 46348 (0.0009) -[2024-07-05 13:57:22,041][03932] Updated weights for policy 0, policy_version 46358 (0.0010) -[2024-07-05 13:57:23,191][03359] Fps is (10 sec: 48331.2, 60 sec: 48332.6, 300 sec: 47793.3). Total num frames: 359809024. Throughput: 0: 12089.8. Samples: 2428260. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:57:23,193][03359] Avg episode reward: [(0, '50.896')] -[2024-07-05 13:57:23,757][03932] Updated weights for policy 0, policy_version 46368 (0.0010) -[2024-07-05 13:57:25,439][03932] Updated weights for policy 0, policy_version 46378 (0.0008) -[2024-07-05 13:57:27,135][03932] Updated weights for policy 0, policy_version 46388 (0.0008) -[2024-07-05 13:57:28,191][03359] Fps is (10 sec: 48333.3, 60 sec: 48332.9, 300 sec: 47825.7). Total num frames: 360054784. Throughput: 0: 12094.1. Samples: 2501068. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:57:28,193][03359] Avg episode reward: [(0, '55.288')] -[2024-07-05 13:57:28,833][03932] Updated weights for policy 0, policy_version 46398 (0.0010) -[2024-07-05 13:57:30,519][03932] Updated weights for policy 0, policy_version 46408 (0.0008) -[2024-07-05 13:57:32,206][03932] Updated weights for policy 0, policy_version 46418 (0.0008) -[2024-07-05 13:57:33,191][03359] Fps is (10 sec: 49153.4, 60 sec: 48469.3, 300 sec: 47856.6). Total num frames: 360300544. Throughput: 0: 12097.7. Samples: 2573756. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:57:33,192][03359] Avg episode reward: [(0, '54.386')] -[2024-07-05 13:57:33,861][03932] Updated weights for policy 0, policy_version 46428 (0.0008) -[2024-07-05 13:57:35,579][03932] Updated weights for policy 0, policy_version 46438 (0.0008) -[2024-07-05 13:57:37,289][03932] Updated weights for policy 0, policy_version 46448 (0.0009) -[2024-07-05 13:57:38,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48469.3, 300 sec: 47848.8). Total num frames: 360538112. Throughput: 0: 12090.5. Samples: 2609700. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:57:38,192][03359] Avg episode reward: [(0, '52.930')] -[2024-07-05 13:57:38,973][03932] Updated weights for policy 0, policy_version 46458 (0.0010) -[2024-07-05 13:57:40,640][03932] Updated weights for policy 0, policy_version 46468 (0.0008) -[2024-07-05 13:57:42,375][03932] Updated weights for policy 0, policy_version 46478 (0.0008) -[2024-07-05 13:57:43,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48469.3, 300 sec: 47877.7). Total num frames: 360783872. Throughput: 0: 12099.8. Samples: 2682600. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:57:43,192][03359] Avg episode reward: [(0, '51.880')] -[2024-07-05 13:57:43,196][03912] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000046483_360783872.pth... -[2024-07-05 13:57:43,263][03912] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000045168_350011392.pth -[2024-07-05 13:57:44,058][03932] Updated weights for policy 0, policy_version 46488 (0.0007) -[2024-07-05 13:57:45,756][03932] Updated weights for policy 0, policy_version 46498 (0.0009) -[2024-07-05 13:57:47,434][03932] Updated weights for policy 0, policy_version 46508 (0.0008) -[2024-07-05 13:57:48,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48469.3, 300 sec: 47869.8). Total num frames: 361021440. Throughput: 0: 12095.0. Samples: 2755024. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:57:48,192][03359] Avg episode reward: [(0, '53.484')] -[2024-07-05 13:57:49,148][03932] Updated weights for policy 0, policy_version 46518 (0.0010) -[2024-07-05 13:57:50,853][03932] Updated weights for policy 0, policy_version 46528 (0.0008) -[2024-07-05 13:57:52,548][03932] Updated weights for policy 0, policy_version 46538 (0.0008) -[2024-07-05 13:57:53,191][03359] Fps is (10 sec: 47513.5, 60 sec: 48332.9, 300 sec: 47862.2). Total num frames: 361259008. Throughput: 0: 12093.6. Samples: 2791428. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:57:53,192][03359] Avg episode reward: [(0, '54.817')] -[2024-07-05 13:57:54,224][03932] Updated weights for policy 0, policy_version 46548 (0.0008) -[2024-07-05 13:57:55,921][03932] Updated weights for policy 0, policy_version 46558 (0.0009) -[2024-07-05 13:57:57,605][03932] Updated weights for policy 0, policy_version 46568 (0.0011) -[2024-07-05 13:57:58,191][03359] Fps is (10 sec: 48332.1, 60 sec: 48332.7, 300 sec: 47889.1). Total num frames: 361504768. Throughput: 0: 12094.7. Samples: 2863768. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:57:58,192][03359] Avg episode reward: [(0, '53.484')] -[2024-07-05 13:57:59,332][03932] Updated weights for policy 0, policy_version 46578 (0.0008) -[2024-07-05 13:58:01,094][03932] Updated weights for policy 0, policy_version 46588 (0.0015) -[2024-07-05 13:58:02,765][03932] Updated weights for policy 0, policy_version 46598 (0.0008) -[2024-07-05 13:58:03,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 47881.4). Total num frames: 361742336. Throughput: 0: 12064.7. Samples: 2935260. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:58:03,192][03359] Avg episode reward: [(0, '50.327')] -[2024-07-05 13:58:04,490][03932] Updated weights for policy 0, policy_version 46608 (0.0012) -[2024-07-05 13:58:06,243][03932] Updated weights for policy 0, policy_version 46618 (0.0014) -[2024-07-05 13:58:08,061][03932] Updated weights for policy 0, policy_version 46628 (0.0008) -[2024-07-05 13:58:08,191][03359] Fps is (10 sec: 47514.1, 60 sec: 48196.3, 300 sec: 47874.1). Total num frames: 361979904. Throughput: 0: 12054.0. Samples: 2970688. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:58:08,192][03359] Avg episode reward: [(0, '50.643')] -[2024-07-05 13:58:09,777][03932] Updated weights for policy 0, policy_version 46638 (0.0007) -[2024-07-05 13:58:11,436][03932] Updated weights for policy 0, policy_version 46648 (0.0010) -[2024-07-05 13:58:13,100][03932] Updated weights for policy 0, policy_version 46658 (0.0008) -[2024-07-05 13:58:13,191][03359] Fps is (10 sec: 47513.5, 60 sec: 48196.2, 300 sec: 47867.0). Total num frames: 362217472. Throughput: 0: 12012.8. Samples: 3041644. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:58:13,192][03359] Avg episode reward: [(0, '52.409')] -[2024-07-05 13:58:14,816][03932] Updated weights for policy 0, policy_version 46668 (0.0008) -[2024-07-05 13:58:16,516][03932] Updated weights for policy 0, policy_version 46678 (0.0008) -[2024-07-05 13:58:18,191][03359] Fps is (10 sec: 47513.8, 60 sec: 48059.9, 300 sec: 47860.2). Total num frames: 362455040. Throughput: 0: 12013.7. Samples: 3114372. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 13:58:18,192][03359] Avg episode reward: [(0, '53.020')] -[2024-07-05 13:58:18,241][03932] Updated weights for policy 0, policy_version 46688 (0.0009) -[2024-07-05 13:58:19,897][03932] Updated weights for policy 0, policy_version 46698 (0.0010) -[2024-07-05 13:58:21,615][03932] Updated weights for policy 0, policy_version 46708 (0.0009) -[2024-07-05 13:58:23,191][03359] Fps is (10 sec: 48332.7, 60 sec: 48196.5, 300 sec: 47884.6). Total num frames: 362700800. Throughput: 0: 12019.4. Samples: 3150572. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:58:23,192][03359] Avg episode reward: [(0, '49.104')] -[2024-07-05 13:58:23,302][03932] Updated weights for policy 0, policy_version 46718 (0.0010) -[2024-07-05 13:58:25,001][03932] Updated weights for policy 0, policy_version 46728 (0.0008) -[2024-07-05 13:58:26,681][03932] Updated weights for policy 0, policy_version 46738 (0.0008) -[2024-07-05 13:58:28,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48059.8, 300 sec: 47877.7). Total num frames: 362938368. Throughput: 0: 12019.5. Samples: 3223476. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:58:28,192][03359] Avg episode reward: [(0, '51.180')] -[2024-07-05 13:58:28,393][03932] Updated weights for policy 0, policy_version 46748 (0.0007) -[2024-07-05 13:58:30,078][03932] Updated weights for policy 0, policy_version 46758 (0.0009) -[2024-07-05 13:58:31,747][03932] Updated weights for policy 0, policy_version 46768 (0.0008) -[2024-07-05 13:58:33,191][03359] Fps is (10 sec: 48332.7, 60 sec: 48059.7, 300 sec: 47900.9). Total num frames: 363184128. Throughput: 0: 12014.9. Samples: 3295696. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:58:33,192][03359] Avg episode reward: [(0, '51.715')] -[2024-07-05 13:58:33,464][03932] Updated weights for policy 0, policy_version 46778 (0.0007) -[2024-07-05 13:58:35,140][03932] Updated weights for policy 0, policy_version 46788 (0.0012) -[2024-07-05 13:58:36,862][03932] Updated weights for policy 0, policy_version 46798 (0.0008) -[2024-07-05 13:58:38,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48059.7, 300 sec: 47894.0). Total num frames: 363421696. Throughput: 0: 12006.5. Samples: 3331720. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:58:38,192][03359] Avg episode reward: [(0, '53.063')] -[2024-07-05 13:58:38,544][03932] Updated weights for policy 0, policy_version 46808 (0.0011) -[2024-07-05 13:58:40,241][03932] Updated weights for policy 0, policy_version 46818 (0.0008) -[2024-07-05 13:58:41,960][03932] Updated weights for policy 0, policy_version 46828 (0.0008) -[2024-07-05 13:58:43,193][03359] Fps is (10 sec: 48322.2, 60 sec: 48057.9, 300 sec: 47915.7). Total num frames: 363667456. Throughput: 0: 12007.3. Samples: 3404120. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:58:43,194][03359] Avg episode reward: [(0, '51.538')] -[2024-07-05 13:58:43,658][03932] Updated weights for policy 0, policy_version 46838 (0.0008) -[2024-07-05 13:58:45,360][03932] Updated weights for policy 0, policy_version 46848 (0.0011) -[2024-07-05 13:58:46,998][03932] Updated weights for policy 0, policy_version 46858 (0.0008) -[2024-07-05 13:58:48,191][03359] Fps is (10 sec: 48332.1, 60 sec: 48059.6, 300 sec: 47909.1). Total num frames: 363905024. Throughput: 0: 12035.7. Samples: 3476868. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:58:48,192][03359] Avg episode reward: [(0, '52.774')] -[2024-07-05 13:58:48,694][03932] Updated weights for policy 0, policy_version 46868 (0.0010) -[2024-07-05 13:58:50,394][03932] Updated weights for policy 0, policy_version 46878 (0.0012) -[2024-07-05 13:58:52,103][03932] Updated weights for policy 0, policy_version 46888 (0.0008) -[2024-07-05 13:58:53,191][03359] Fps is (10 sec: 48343.4, 60 sec: 48196.3, 300 sec: 47930.2). Total num frames: 364150784. Throughput: 0: 12045.4. Samples: 3512732. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:58:53,192][03359] Avg episode reward: [(0, '52.777')] -[2024-07-05 13:58:53,802][03932] Updated weights for policy 0, policy_version 46898 (0.0007) -[2024-07-05 13:58:55,481][03932] Updated weights for policy 0, policy_version 46908 (0.0007) -[2024-07-05 13:58:57,140][03932] Updated weights for policy 0, policy_version 46918 (0.0009) -[2024-07-05 13:58:58,191][03359] Fps is (10 sec: 49152.9, 60 sec: 48196.4, 300 sec: 48207.9). Total num frames: 364396544. Throughput: 0: 12091.4. Samples: 3585756. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:58:58,192][03359] Avg episode reward: [(0, '52.925')] -[2024-07-05 13:58:58,866][03932] Updated weights for policy 0, policy_version 46928 (0.0008) -[2024-07-05 13:59:00,604][03932] Updated weights for policy 0, policy_version 46938 (0.0008) -[2024-07-05 13:59:02,304][03932] Updated weights for policy 0, policy_version 46948 (0.0010) -[2024-07-05 13:59:03,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 364634112. Throughput: 0: 12080.6. Samples: 3658000. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:59:03,192][03359] Avg episode reward: [(0, '52.729')] -[2024-07-05 13:59:03,989][03932] Updated weights for policy 0, policy_version 46958 (0.0008) -[2024-07-05 13:59:05,692][03932] Updated weights for policy 0, policy_version 46968 (0.0010) -[2024-07-05 13:59:07,399][03932] Updated weights for policy 0, policy_version 46978 (0.0009) -[2024-07-05 13:59:08,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 364879872. Throughput: 0: 12085.1. Samples: 3694400. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:59:08,192][03359] Avg episode reward: [(0, '51.981')] -[2024-07-05 13:59:09,077][03932] Updated weights for policy 0, policy_version 46988 (0.0009) -[2024-07-05 13:59:10,769][03932] Updated weights for policy 0, policy_version 46998 (0.0010) -[2024-07-05 13:59:12,442][03932] Updated weights for policy 0, policy_version 47008 (0.0008) -[2024-07-05 13:59:13,191][03359] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48207.8). Total num frames: 365117440. Throughput: 0: 12068.3. Samples: 3766548. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 13:59:13,192][03359] Avg episode reward: [(0, '55.026')] -[2024-07-05 13:59:14,148][03932] Updated weights for policy 0, policy_version 47018 (0.0010) -[2024-07-05 13:59:15,842][03932] Updated weights for policy 0, policy_version 47028 (0.0009) -[2024-07-05 13:59:17,558][03932] Updated weights for policy 0, policy_version 47038 (0.0008) -[2024-07-05 13:59:18,191][03359] Fps is (10 sec: 47512.9, 60 sec: 48332.7, 300 sec: 48207.8). Total num frames: 365355008. Throughput: 0: 12079.2. Samples: 3839260. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:59:18,193][03359] Avg episode reward: [(0, '54.639')] -[2024-07-05 13:59:19,222][03932] Updated weights for policy 0, policy_version 47048 (0.0008) -[2024-07-05 13:59:20,938][03932] Updated weights for policy 0, policy_version 47058 (0.0013) -[2024-07-05 13:59:22,648][03932] Updated weights for policy 0, policy_version 47068 (0.0008) -[2024-07-05 13:59:23,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 48207.9). Total num frames: 365600768. Throughput: 0: 12079.0. Samples: 3875276. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:59:23,192][03359] Avg episode reward: [(0, '51.839')] -[2024-07-05 13:59:24,362][03932] Updated weights for policy 0, policy_version 47078 (0.0010) -[2024-07-05 13:59:26,040][03932] Updated weights for policy 0, policy_version 47088 (0.0010) -[2024-07-05 13:59:27,713][03932] Updated weights for policy 0, policy_version 47098 (0.0008) -[2024-07-05 13:59:28,191][03359] Fps is (10 sec: 48333.4, 60 sec: 48332.8, 300 sec: 48291.2). Total num frames: 365838336. Throughput: 0: 12078.5. Samples: 3947624. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:59:28,192][03359] Avg episode reward: [(0, '53.157')] -[2024-07-05 13:59:29,403][03932] Updated weights for policy 0, policy_version 47108 (0.0008) -[2024-07-05 13:59:31,133][03932] Updated weights for policy 0, policy_version 47118 (0.0008) -[2024-07-05 13:59:32,804][03932] Updated weights for policy 0, policy_version 47128 (0.0007) -[2024-07-05 13:59:33,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 366084096. Throughput: 0: 12081.7. Samples: 4020544. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:59:33,192][03359] Avg episode reward: [(0, '53.053')] -[2024-07-05 13:59:34,477][03932] Updated weights for policy 0, policy_version 47138 (0.0008) -[2024-07-05 13:59:36,216][03932] Updated weights for policy 0, policy_version 47148 (0.0011) -[2024-07-05 13:59:37,892][03932] Updated weights for policy 0, policy_version 47158 (0.0010) -[2024-07-05 13:59:38,191][03359] Fps is (10 sec: 48332.5, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 366321664. Throughput: 0: 12086.1. Samples: 4056608. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:59:38,192][03359] Avg episode reward: [(0, '54.015')] -[2024-07-05 13:59:39,600][03932] Updated weights for policy 0, policy_version 47168 (0.0008) -[2024-07-05 13:59:41,294][03932] Updated weights for policy 0, policy_version 47178 (0.0009) -[2024-07-05 13:59:42,986][03932] Updated weights for policy 0, policy_version 47188 (0.0008) -[2024-07-05 13:59:43,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48334.6, 300 sec: 48291.2). Total num frames: 366567424. Throughput: 0: 12075.7. Samples: 4129164. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:59:43,192][03359] Avg episode reward: [(0, '53.445')] -[2024-07-05 13:59:43,197][03912] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000047189_366567424.pth... -[2024-07-05 13:59:43,262][03912] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000045774_354975744.pth -[2024-07-05 13:59:44,682][03932] Updated weights for policy 0, policy_version 47198 (0.0010) -[2024-07-05 13:59:46,402][03932] Updated weights for policy 0, policy_version 47208 (0.0008) -[2024-07-05 13:59:48,114][03932] Updated weights for policy 0, policy_version 47218 (0.0008) -[2024-07-05 13:59:48,191][03359] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 366804992. Throughput: 0: 12067.7. Samples: 4201048. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:59:48,192][03359] Avg episode reward: [(0, '52.790')] -[2024-07-05 13:59:49,808][03932] Updated weights for policy 0, policy_version 47228 (0.0008) -[2024-07-05 13:59:51,518][03932] Updated weights for policy 0, policy_version 47238 (0.0008) -[2024-07-05 13:59:53,163][03932] Updated weights for policy 0, policy_version 47248 (0.0008) -[2024-07-05 13:59:53,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 367050752. Throughput: 0: 12074.6. Samples: 4237756. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:59:53,192][03359] Avg episode reward: [(0, '50.600')] -[2024-07-05 13:59:54,856][03932] Updated weights for policy 0, policy_version 47258 (0.0008) -[2024-07-05 13:59:56,551][03932] Updated weights for policy 0, policy_version 47268 (0.0010) -[2024-07-05 13:59:58,191][03359] Fps is (10 sec: 48333.2, 60 sec: 48196.2, 300 sec: 48318.9). Total num frames: 367288320. Throughput: 0: 12080.7. Samples: 4310180. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 13:59:58,192][03359] Avg episode reward: [(0, '53.661')] -[2024-07-05 13:59:58,212][03932] Updated weights for policy 0, policy_version 47278 (0.0008) -[2024-07-05 13:59:59,926][03932] Updated weights for policy 0, policy_version 47288 (0.0007) -[2024-07-05 14:00:01,644][03932] Updated weights for policy 0, policy_version 47298 (0.0008) -[2024-07-05 14:00:03,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 367534080. Throughput: 0: 12077.0. Samples: 4382724. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:00:03,192][03359] Avg episode reward: [(0, '54.921')] -[2024-07-05 14:00:03,353][03932] Updated weights for policy 0, policy_version 47308 (0.0008) -[2024-07-05 14:00:05,053][03932] Updated weights for policy 0, policy_version 47318 (0.0008) -[2024-07-05 14:00:06,754][03932] Updated weights for policy 0, policy_version 47328 (0.0008) -[2024-07-05 14:00:08,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48318.9). Total num frames: 367771648. Throughput: 0: 12075.0. Samples: 4418652. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:00:08,192][03359] Avg episode reward: [(0, '53.021')] -[2024-07-05 14:00:08,446][03932] Updated weights for policy 0, policy_version 47338 (0.0008) -[2024-07-05 14:00:10,150][03932] Updated weights for policy 0, policy_version 47348 (0.0007) -[2024-07-05 14:00:11,854][03932] Updated weights for policy 0, policy_version 47358 (0.0010) -[2024-07-05 14:00:13,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 368017408. Throughput: 0: 12070.3. Samples: 4490788. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:00:13,192][03359] Avg episode reward: [(0, '52.634')] -[2024-07-05 14:00:13,571][03932] Updated weights for policy 0, policy_version 47368 (0.0008) -[2024-07-05 14:00:15,279][03932] Updated weights for policy 0, policy_version 47378 (0.0008) -[2024-07-05 14:00:16,981][03932] Updated weights for policy 0, policy_version 47388 (0.0009) -[2024-07-05 14:00:18,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48332.9, 300 sec: 48318.9). Total num frames: 368254976. Throughput: 0: 12053.5. Samples: 4562952. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:00:18,192][03359] Avg episode reward: [(0, '53.568')] -[2024-07-05 14:00:18,679][03932] Updated weights for policy 0, policy_version 47398 (0.0008) -[2024-07-05 14:00:20,369][03932] Updated weights for policy 0, policy_version 47408 (0.0007) -[2024-07-05 14:00:22,103][03932] Updated weights for policy 0, policy_version 47418 (0.0008) -[2024-07-05 14:00:23,191][03359] Fps is (10 sec: 47513.6, 60 sec: 48196.2, 300 sec: 48291.1). Total num frames: 368492544. Throughput: 0: 12055.7. Samples: 4599116. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:00:23,193][03359] Avg episode reward: [(0, '53.971')] -[2024-07-05 14:00:23,766][03932] Updated weights for policy 0, policy_version 47428 (0.0014) -[2024-07-05 14:00:25,458][03932] Updated weights for policy 0, policy_version 47438 (0.0010) -[2024-07-05 14:00:27,160][03932] Updated weights for policy 0, policy_version 47448 (0.0008) -[2024-07-05 14:00:28,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 368738304. Throughput: 0: 12058.1. Samples: 4671780. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:00:28,192][03359] Avg episode reward: [(0, '53.363')] -[2024-07-05 14:00:28,849][03932] Updated weights for policy 0, policy_version 47458 (0.0008) -[2024-07-05 14:00:30,540][03932] Updated weights for policy 0, policy_version 47468 (0.0008) -[2024-07-05 14:00:32,254][03932] Updated weights for policy 0, policy_version 47478 (0.0008) -[2024-07-05 14:00:33,191][03359] Fps is (10 sec: 48331.7, 60 sec: 48196.1, 300 sec: 48291.1). Total num frames: 368975872. Throughput: 0: 12067.5. Samples: 4744088. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:00:33,193][03359] Avg episode reward: [(0, '52.754')] -[2024-07-05 14:00:33,950][03932] Updated weights for policy 0, policy_version 47488 (0.0008) -[2024-07-05 14:00:35,590][03932] Updated weights for policy 0, policy_version 47498 (0.0008) -[2024-07-05 14:00:37,267][03932] Updated weights for policy 0, policy_version 47508 (0.0008) -[2024-07-05 14:00:38,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48332.9, 300 sec: 48291.2). Total num frames: 369221632. Throughput: 0: 12062.7. Samples: 4780576. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:00:38,192][03359] Avg episode reward: [(0, '51.583')] -[2024-07-05 14:00:38,999][03932] Updated weights for policy 0, policy_version 47518 (0.0008) -[2024-07-05 14:00:40,720][03932] Updated weights for policy 0, policy_version 47528 (0.0007) -[2024-07-05 14:00:42,424][03932] Updated weights for policy 0, policy_version 47538 (0.0008) -[2024-07-05 14:00:43,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48196.1, 300 sec: 48291.1). Total num frames: 369459200. Throughput: 0: 12056.1. Samples: 4852708. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:00:43,192][03359] Avg episode reward: [(0, '52.681')] -[2024-07-05 14:00:44,111][03932] Updated weights for policy 0, policy_version 47548 (0.0007) -[2024-07-05 14:00:45,840][03932] Updated weights for policy 0, policy_version 47558 (0.0008) -[2024-07-05 14:00:47,528][03932] Updated weights for policy 0, policy_version 47568 (0.0008) -[2024-07-05 14:00:48,191][03359] Fps is (10 sec: 48331.7, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 369704960. Throughput: 0: 12055.8. Samples: 4925236. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:00:48,192][03359] Avg episode reward: [(0, '54.596')] -[2024-07-05 14:00:49,192][03932] Updated weights for policy 0, policy_version 47578 (0.0008) -[2024-07-05 14:00:50,903][03932] Updated weights for policy 0, policy_version 47588 (0.0008) -[2024-07-05 14:00:52,603][03932] Updated weights for policy 0, policy_version 47598 (0.0008) -[2024-07-05 14:00:53,191][03359] Fps is (10 sec: 48333.6, 60 sec: 48196.2, 300 sec: 48291.2). Total num frames: 369942528. Throughput: 0: 12058.1. Samples: 4961268. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:00:53,192][03359] Avg episode reward: [(0, '51.943')] -[2024-07-05 14:00:54,281][03932] Updated weights for policy 0, policy_version 47608 (0.0008) -[2024-07-05 14:00:55,986][03932] Updated weights for policy 0, policy_version 47618 (0.0008) -[2024-07-05 14:00:57,679][03932] Updated weights for policy 0, policy_version 47628 (0.0008) -[2024-07-05 14:00:58,191][03359] Fps is (10 sec: 47514.7, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 370180096. Throughput: 0: 12066.0. Samples: 5033756. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:00:58,192][03359] Avg episode reward: [(0, '55.751')] -[2024-07-05 14:00:59,348][03932] Updated weights for policy 0, policy_version 47638 (0.0008) -[2024-07-05 14:01:01,044][03932] Updated weights for policy 0, policy_version 47648 (0.0008) -[2024-07-05 14:01:02,785][03932] Updated weights for policy 0, policy_version 47658 (0.0008) -[2024-07-05 14:01:03,191][03359] Fps is (10 sec: 48333.1, 60 sec: 48196.2, 300 sec: 48291.2). Total num frames: 370425856. Throughput: 0: 12072.5. Samples: 5106212. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:01:03,192][03359] Avg episode reward: [(0, '53.926')] -[2024-07-05 14:01:04,497][03932] Updated weights for policy 0, policy_version 47668 (0.0008) -[2024-07-05 14:01:06,166][03932] Updated weights for policy 0, policy_version 47678 (0.0010) -[2024-07-05 14:01:07,871][03932] Updated weights for policy 0, policy_version 47688 (0.0008) -[2024-07-05 14:01:08,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 370663424. Throughput: 0: 12072.3. Samples: 5142368. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:01:08,192][03359] Avg episode reward: [(0, '53.224')] -[2024-07-05 14:01:09,567][03932] Updated weights for policy 0, policy_version 47698 (0.0012) -[2024-07-05 14:01:11,255][03932] Updated weights for policy 0, policy_version 47708 (0.0008) -[2024-07-05 14:01:12,933][03932] Updated weights for policy 0, policy_version 47718 (0.0008) -[2024-07-05 14:01:13,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48291.1). Total num frames: 370909184. Throughput: 0: 12066.8. Samples: 5214788. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:01:13,192][03359] Avg episode reward: [(0, '55.500')] -[2024-07-05 14:01:14,627][03932] Updated weights for policy 0, policy_version 47728 (0.0007) -[2024-07-05 14:01:16,343][03932] Updated weights for policy 0, policy_version 47738 (0.0011) -[2024-07-05 14:01:18,025][03932] Updated weights for policy 0, policy_version 47748 (0.0012) -[2024-07-05 14:01:18,191][03359] Fps is (10 sec: 48331.5, 60 sec: 48196.1, 300 sec: 48263.3). Total num frames: 371146752. Throughput: 0: 12075.5. Samples: 5287484. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:01:18,193][03359] Avg episode reward: [(0, '53.437')] -[2024-07-05 14:01:19,702][03932] Updated weights for policy 0, policy_version 47758 (0.0013) -[2024-07-05 14:01:21,434][03932] Updated weights for policy 0, policy_version 47768 (0.0009) -[2024-07-05 14:01:23,151][03932] Updated weights for policy 0, policy_version 47778 (0.0007) -[2024-07-05 14:01:23,191][03359] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 371392512. Throughput: 0: 12063.0. Samples: 5323412. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:01:23,192][03359] Avg episode reward: [(0, '54.731')] -[2024-07-05 14:01:24,857][03932] Updated weights for policy 0, policy_version 47788 (0.0008) -[2024-07-05 14:01:26,558][03932] Updated weights for policy 0, policy_version 47798 (0.0007) -[2024-07-05 14:01:28,191][03359] Fps is (10 sec: 48333.2, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 371630080. Throughput: 0: 12071.0. Samples: 5395900. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:01:28,192][03359] Avg episode reward: [(0, '55.813')] -[2024-07-05 14:01:28,205][03932] Updated weights for policy 0, policy_version 47808 (0.0008) -[2024-07-05 14:01:29,912][03932] Updated weights for policy 0, policy_version 47818 (0.0008) -[2024-07-05 14:01:31,586][03932] Updated weights for policy 0, policy_version 47828 (0.0010) -[2024-07-05 14:01:33,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48333.0, 300 sec: 48291.1). Total num frames: 371875840. Throughput: 0: 12077.0. Samples: 5468700. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:01:33,192][03359] Avg episode reward: [(0, '54.610')] -[2024-07-05 14:01:33,298][03932] Updated weights for policy 0, policy_version 47838 (0.0011) -[2024-07-05 14:01:34,978][03932] Updated weights for policy 0, policy_version 47848 (0.0008) -[2024-07-05 14:01:36,655][03932] Updated weights for policy 0, policy_version 47858 (0.0009) -[2024-07-05 14:01:38,191][03359] Fps is (10 sec: 48333.6, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 372113408. Throughput: 0: 12084.1. Samples: 5505052. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:01:38,192][03359] Avg episode reward: [(0, '54.638')] -[2024-07-05 14:01:38,379][03932] Updated weights for policy 0, policy_version 47868 (0.0008) -[2024-07-05 14:01:40,070][03932] Updated weights for policy 0, policy_version 47878 (0.0008) -[2024-07-05 14:01:41,739][03932] Updated weights for policy 0, policy_version 47888 (0.0008) -[2024-07-05 14:01:43,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48333.0, 300 sec: 48291.1). Total num frames: 372359168. Throughput: 0: 12077.3. Samples: 5577236. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:01:43,192][03359] Avg episode reward: [(0, '55.108')] -[2024-07-05 14:01:43,197][03912] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000047896_372359168.pth... -[2024-07-05 14:01:43,269][03912] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000046483_360783872.pth -[2024-07-05 14:01:43,490][03932] Updated weights for policy 0, policy_version 47898 (0.0007) -[2024-07-05 14:01:45,160][03932] Updated weights for policy 0, policy_version 47908 (0.0007) -[2024-07-05 14:01:46,860][03932] Updated weights for policy 0, policy_version 47918 (0.0008) -[2024-07-05 14:01:48,192][03359] Fps is (10 sec: 48328.2, 60 sec: 48195.7, 300 sec: 48263.2). Total num frames: 372596736. Throughput: 0: 12081.9. Samples: 5649908. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:01:48,193][03359] Avg episode reward: [(0, '55.658')] -[2024-07-05 14:01:48,559][03932] Updated weights for policy 0, policy_version 47928 (0.0008) -[2024-07-05 14:01:50,265][03932] Updated weights for policy 0, policy_version 47938 (0.0010) -[2024-07-05 14:01:51,985][03932] Updated weights for policy 0, policy_version 47948 (0.0008) -[2024-07-05 14:01:53,191][03359] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 372842496. Throughput: 0: 12077.0. Samples: 5685836. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:01:53,192][03359] Avg episode reward: [(0, '52.505')] -[2024-07-05 14:01:53,664][03932] Updated weights for policy 0, policy_version 47958 (0.0010) -[2024-07-05 14:01:55,306][03932] Updated weights for policy 0, policy_version 47968 (0.0008) -[2024-07-05 14:01:57,048][03932] Updated weights for policy 0, policy_version 47978 (0.0008) -[2024-07-05 14:01:58,191][03359] Fps is (10 sec: 48336.7, 60 sec: 48332.7, 300 sec: 48263.4). Total num frames: 373080064. Throughput: 0: 12080.0. Samples: 5758388. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:01:58,193][03359] Avg episode reward: [(0, '56.406')] -[2024-07-05 14:01:58,194][03912] Saving new best policy, reward=56.406! -[2024-07-05 14:01:58,750][03932] Updated weights for policy 0, policy_version 47988 (0.0008) -[2024-07-05 14:02:00,464][03932] Updated weights for policy 0, policy_version 47998 (0.0008) -[2024-07-05 14:02:02,143][03932] Updated weights for policy 0, policy_version 48008 (0.0010) -[2024-07-05 14:02:03,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 373325824. Throughput: 0: 12072.3. Samples: 5830736. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:02:03,192][03359] Avg episode reward: [(0, '53.588')] -[2024-07-05 14:02:03,835][03932] Updated weights for policy 0, policy_version 48018 (0.0011) -[2024-07-05 14:02:05,559][03932] Updated weights for policy 0, policy_version 48028 (0.0008) -[2024-07-05 14:02:07,225][03932] Updated weights for policy 0, policy_version 48038 (0.0008) -[2024-07-05 14:02:08,191][03359] Fps is (10 sec: 48333.3, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 373563392. Throughput: 0: 12075.9. Samples: 5866828. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:02:08,192][03359] Avg episode reward: [(0, '51.579')] -[2024-07-05 14:02:08,915][03932] Updated weights for policy 0, policy_version 48048 (0.0011) -[2024-07-05 14:02:10,595][03932] Updated weights for policy 0, policy_version 48058 (0.0008) -[2024-07-05 14:02:12,286][03932] Updated weights for policy 0, policy_version 48068 (0.0009) -[2024-07-05 14:02:13,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 373809152. Throughput: 0: 12081.9. Samples: 5939584. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:02:13,192][03359] Avg episode reward: [(0, '51.128')] -[2024-07-05 14:02:13,969][03932] Updated weights for policy 0, policy_version 48078 (0.0009) -[2024-07-05 14:02:15,685][03932] Updated weights for policy 0, policy_version 48088 (0.0010) -[2024-07-05 14:02:17,425][03932] Updated weights for policy 0, policy_version 48098 (0.0008) -[2024-07-05 14:02:18,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48333.0, 300 sec: 48263.4). Total num frames: 374046720. Throughput: 0: 12070.0. Samples: 6011848. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:02:18,192][03359] Avg episode reward: [(0, '52.650')] -[2024-07-05 14:02:19,082][03932] Updated weights for policy 0, policy_version 48108 (0.0008) -[2024-07-05 14:02:20,824][03932] Updated weights for policy 0, policy_version 48118 (0.0008) -[2024-07-05 14:02:22,458][03932] Updated weights for policy 0, policy_version 48128 (0.0008) -[2024-07-05 14:02:23,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 374292480. Throughput: 0: 12073.4. Samples: 6048356. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:02:23,192][03359] Avg episode reward: [(0, '51.042')] -[2024-07-05 14:02:24,150][03932] Updated weights for policy 0, policy_version 48138 (0.0007) -[2024-07-05 14:02:25,864][03932] Updated weights for policy 0, policy_version 48148 (0.0008) -[2024-07-05 14:02:27,574][03932] Updated weights for policy 0, policy_version 48158 (0.0008) -[2024-07-05 14:02:28,191][03359] Fps is (10 sec: 48331.6, 60 sec: 48332.7, 300 sec: 48235.6). Total num frames: 374530048. Throughput: 0: 12079.6. Samples: 6120820. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:02:28,192][03359] Avg episode reward: [(0, '53.056')] -[2024-07-05 14:02:29,277][03932] Updated weights for policy 0, policy_version 48168 (0.0011) -[2024-07-05 14:02:31,018][03932] Updated weights for policy 0, policy_version 48178 (0.0008) -[2024-07-05 14:02:32,705][03932] Updated weights for policy 0, policy_version 48188 (0.0008) -[2024-07-05 14:02:33,191][03359] Fps is (10 sec: 48331.8, 60 sec: 48332.6, 300 sec: 48263.3). Total num frames: 374775808. Throughput: 0: 12070.3. Samples: 6193064. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:02:33,192][03359] Avg episode reward: [(0, '51.131')] -[2024-07-05 14:02:34,428][03932] Updated weights for policy 0, policy_version 48198 (0.0008) -[2024-07-05 14:02:36,043][03932] Updated weights for policy 0, policy_version 48208 (0.0012) -[2024-07-05 14:02:37,740][03932] Updated weights for policy 0, policy_version 48218 (0.0007) -[2024-07-05 14:02:38,191][03359] Fps is (10 sec: 48333.7, 60 sec: 48332.7, 300 sec: 48235.6). Total num frames: 375013376. Throughput: 0: 12075.0. Samples: 6229212. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:02:38,192][03359] Avg episode reward: [(0, '50.899')] -[2024-07-05 14:02:39,430][03932] Updated weights for policy 0, policy_version 48228 (0.0008) -[2024-07-05 14:02:41,159][03932] Updated weights for policy 0, policy_version 48238 (0.0008) -[2024-07-05 14:02:42,853][03932] Updated weights for policy 0, policy_version 48248 (0.0008) -[2024-07-05 14:02:43,191][03359] Fps is (10 sec: 47514.9, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 375250944. Throughput: 0: 12072.0. Samples: 6301624. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:02:43,192][03359] Avg episode reward: [(0, '51.984')] -[2024-07-05 14:02:44,580][03932] Updated weights for policy 0, policy_version 48258 (0.0011) -[2024-07-05 14:02:46,251][03932] Updated weights for policy 0, policy_version 48268 (0.0008) -[2024-07-05 14:02:47,942][03932] Updated weights for policy 0, policy_version 48278 (0.0008) -[2024-07-05 14:02:48,191][03359] Fps is (10 sec: 48333.2, 60 sec: 48333.6, 300 sec: 48263.4). Total num frames: 375496704. Throughput: 0: 12063.7. Samples: 6373600. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:02:48,192][03359] Avg episode reward: [(0, '52.243')] -[2024-07-05 14:02:49,632][03932] Updated weights for policy 0, policy_version 48288 (0.0009) -[2024-07-05 14:02:51,332][03932] Updated weights for policy 0, policy_version 48298 (0.0008) -[2024-07-05 14:02:53,027][03932] Updated weights for policy 0, policy_version 48308 (0.0010) -[2024-07-05 14:02:53,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 375734272. Throughput: 0: 12073.3. Samples: 6410128. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:02:53,192][03359] Avg episode reward: [(0, '52.685')] -[2024-07-05 14:02:54,762][03932] Updated weights for policy 0, policy_version 48318 (0.0008) -[2024-07-05 14:02:56,406][03932] Updated weights for policy 0, policy_version 48328 (0.0008) -[2024-07-05 14:02:58,100][03932] Updated weights for policy 0, policy_version 48338 (0.0007) -[2024-07-05 14:02:58,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48332.9, 300 sec: 48263.4). Total num frames: 375980032. Throughput: 0: 12069.3. Samples: 6482704. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:02:58,192][03359] Avg episode reward: [(0, '52.296')] -[2024-07-05 14:02:59,790][03932] Updated weights for policy 0, policy_version 48348 (0.0010) -[2024-07-05 14:03:01,494][03932] Updated weights for policy 0, policy_version 48358 (0.0008) -[2024-07-05 14:03:03,173][03932] Updated weights for policy 0, policy_version 48368 (0.0008) -[2024-07-05 14:03:03,191][03359] Fps is (10 sec: 49152.3, 60 sec: 48332.8, 300 sec: 48291.2). Total num frames: 376225792. Throughput: 0: 12074.6. Samples: 6555204. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:03:03,192][03359] Avg episode reward: [(0, '52.439')] -[2024-07-05 14:03:04,897][03932] Updated weights for policy 0, policy_version 48378 (0.0008) -[2024-07-05 14:03:06,592][03932] Updated weights for policy 0, policy_version 48388 (0.0008) -[2024-07-05 14:03:08,191][03359] Fps is (10 sec: 48332.2, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 376463360. Throughput: 0: 12063.0. Samples: 6591192. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:03:08,192][03359] Avg episode reward: [(0, '52.910')] -[2024-07-05 14:03:08,329][03932] Updated weights for policy 0, policy_version 48398 (0.0007) -[2024-07-05 14:03:10,002][03932] Updated weights for policy 0, policy_version 48408 (0.0008) -[2024-07-05 14:03:11,699][03932] Updated weights for policy 0, policy_version 48418 (0.0009) -[2024-07-05 14:03:13,191][03359] Fps is (10 sec: 48332.2, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 376709120. Throughput: 0: 12067.7. Samples: 6663864. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:03:13,192][03359] Avg episode reward: [(0, '52.724')] -[2024-07-05 14:03:13,381][03932] Updated weights for policy 0, policy_version 48428 (0.0007) -[2024-07-05 14:03:15,074][03932] Updated weights for policy 0, policy_version 48438 (0.0008) -[2024-07-05 14:03:16,762][03932] Updated weights for policy 0, policy_version 48448 (0.0008) -[2024-07-05 14:03:18,191][03359] Fps is (10 sec: 48333.5, 60 sec: 48332.8, 300 sec: 48291.2). Total num frames: 376946688. Throughput: 0: 12073.8. Samples: 6736380. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:03:18,192][03359] Avg episode reward: [(0, '54.418')] -[2024-07-05 14:03:18,464][03932] Updated weights for policy 0, policy_version 48458 (0.0010) -[2024-07-05 14:03:20,151][03932] Updated weights for policy 0, policy_version 48468 (0.0008) -[2024-07-05 14:03:21,845][03932] Updated weights for policy 0, policy_version 48478 (0.0008) -[2024-07-05 14:03:23,191][03359] Fps is (10 sec: 47514.0, 60 sec: 48196.3, 300 sec: 48291.1). Total num frames: 377184256. Throughput: 0: 12074.9. Samples: 6772584. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:03:23,192][03359] Avg episode reward: [(0, '52.540')] -[2024-07-05 14:03:23,539][03932] Updated weights for policy 0, policy_version 48488 (0.0008) -[2024-07-05 14:03:25,207][03932] Updated weights for policy 0, policy_version 48498 (0.0007) -[2024-07-05 14:03:26,925][03932] Updated weights for policy 0, policy_version 48508 (0.0008) -[2024-07-05 14:03:28,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48333.0, 300 sec: 48291.2). Total num frames: 377430016. Throughput: 0: 12077.6. Samples: 6845116. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:03:28,192][03359] Avg episode reward: [(0, '55.263')] -[2024-07-05 14:03:28,599][03932] Updated weights for policy 0, policy_version 48518 (0.0010) -[2024-07-05 14:03:30,294][03932] Updated weights for policy 0, policy_version 48528 (0.0008) -[2024-07-05 14:03:32,003][03932] Updated weights for policy 0, policy_version 48538 (0.0008) -[2024-07-05 14:03:33,191][03359] Fps is (10 sec: 49152.1, 60 sec: 48333.0, 300 sec: 48318.9). Total num frames: 377675776. Throughput: 0: 12097.7. Samples: 6917996. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:03:33,192][03359] Avg episode reward: [(0, '56.328')] -[2024-07-05 14:03:33,686][03932] Updated weights for policy 0, policy_version 48548 (0.0009) -[2024-07-05 14:03:35,386][03932] Updated weights for policy 0, policy_version 48558 (0.0011) -[2024-07-05 14:03:37,098][03932] Updated weights for policy 0, policy_version 48568 (0.0008) -[2024-07-05 14:03:38,191][03359] Fps is (10 sec: 48331.9, 60 sec: 48332.7, 300 sec: 48291.5). Total num frames: 377913344. Throughput: 0: 12092.6. Samples: 6954296. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:03:38,192][03359] Avg episode reward: [(0, '53.248')] -[2024-07-05 14:03:38,766][03932] Updated weights for policy 0, policy_version 48578 (0.0010) -[2024-07-05 14:03:40,459][03932] Updated weights for policy 0, policy_version 48588 (0.0008) -[2024-07-05 14:03:42,155][03932] Updated weights for policy 0, policy_version 48598 (0.0008) -[2024-07-05 14:03:43,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48469.3, 300 sec: 48318.9). Total num frames: 378159104. Throughput: 0: 12094.8. Samples: 7026968. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:03:43,192][03359] Avg episode reward: [(0, '54.560')] -[2024-07-05 14:03:43,196][03912] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000048604_378159104.pth... -[2024-07-05 14:03:43,268][03912] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000047189_366567424.pth -[2024-07-05 14:03:43,834][03932] Updated weights for policy 0, policy_version 48608 (0.0008) -[2024-07-05 14:03:45,530][03932] Updated weights for policy 0, policy_version 48618 (0.0008) -[2024-07-05 14:03:47,258][03932] Updated weights for policy 0, policy_version 48628 (0.0008) -[2024-07-05 14:03:48,191][03359] Fps is (10 sec: 48333.4, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 378396672. Throughput: 0: 12083.2. Samples: 7098948. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:03:48,192][03359] Avg episode reward: [(0, '53.377')] -[2024-07-05 14:03:48,964][03932] Updated weights for policy 0, policy_version 48638 (0.0008) -[2024-07-05 14:03:50,659][03932] Updated weights for policy 0, policy_version 48648 (0.0008) -[2024-07-05 14:03:52,347][03932] Updated weights for policy 0, policy_version 48658 (0.0008) -[2024-07-05 14:03:53,191][03359] Fps is (10 sec: 48331.6, 60 sec: 48469.2, 300 sec: 48291.1). Total num frames: 378642432. Throughput: 0: 12093.0. Samples: 7135376. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:03:53,192][03359] Avg episode reward: [(0, '54.062')] -[2024-07-05 14:03:54,039][03932] Updated weights for policy 0, policy_version 48668 (0.0008) -[2024-07-05 14:03:55,706][03932] Updated weights for policy 0, policy_version 48678 (0.0009) -[2024-07-05 14:03:57,416][03932] Updated weights for policy 0, policy_version 48688 (0.0008) -[2024-07-05 14:03:58,191][03359] Fps is (10 sec: 48332.1, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 378880000. Throughput: 0: 12085.7. Samples: 7207720. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:03:58,193][03359] Avg episode reward: [(0, '54.103')] -[2024-07-05 14:03:59,099][03932] Updated weights for policy 0, policy_version 48698 (0.0008) -[2024-07-05 14:04:00,779][03932] Updated weights for policy 0, policy_version 48708 (0.0009) -[2024-07-05 14:04:02,485][03932] Updated weights for policy 0, policy_version 48718 (0.0008) -[2024-07-05 14:04:03,191][03359] Fps is (10 sec: 48333.5, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 379125760. Throughput: 0: 12081.0. Samples: 7280028. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:04:03,193][03359] Avg episode reward: [(0, '53.733')] -[2024-07-05 14:04:04,228][03932] Updated weights for policy 0, policy_version 48728 (0.0008) -[2024-07-05 14:04:05,924][03932] Updated weights for policy 0, policy_version 48738 (0.0007) -[2024-07-05 14:04:07,631][03932] Updated weights for policy 0, policy_version 48748 (0.0008) -[2024-07-05 14:04:08,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 379363328. Throughput: 0: 12078.9. Samples: 7316136. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:04:08,192][03359] Avg episode reward: [(0, '54.793')] -[2024-07-05 14:04:09,333][03932] Updated weights for policy 0, policy_version 48758 (0.0007) -[2024-07-05 14:04:11,006][03932] Updated weights for policy 0, policy_version 48768 (0.0008) -[2024-07-05 14:04:12,717][03932] Updated weights for policy 0, policy_version 48778 (0.0008) -[2024-07-05 14:04:13,191][03359] Fps is (10 sec: 47512.8, 60 sec: 48196.2, 300 sec: 48291.1). Total num frames: 379600896. Throughput: 0: 12078.2. Samples: 7388640. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:04:13,193][03359] Avg episode reward: [(0, '56.184')] -[2024-07-05 14:04:14,414][03932] Updated weights for policy 0, policy_version 48788 (0.0008) -[2024-07-05 14:04:16,154][03932] Updated weights for policy 0, policy_version 48798 (0.0008) -[2024-07-05 14:04:17,863][03932] Updated weights for policy 0, policy_version 48808 (0.0008) -[2024-07-05 14:04:18,191][03359] Fps is (10 sec: 47513.8, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 379838464. Throughput: 0: 12068.4. Samples: 7461076. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:04:18,192][03359] Avg episode reward: [(0, '53.661')] -[2024-07-05 14:04:19,538][03932] Updated weights for policy 0, policy_version 48818 (0.0008) -[2024-07-05 14:04:21,197][03932] Updated weights for policy 0, policy_version 48828 (0.0011) -[2024-07-05 14:04:22,935][03932] Updated weights for policy 0, policy_version 48838 (0.0008) -[2024-07-05 14:04:23,191][03359] Fps is (10 sec: 48333.6, 60 sec: 48332.8, 300 sec: 48291.1). Total num frames: 380084224. Throughput: 0: 12060.3. Samples: 7497008. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:04:23,192][03359] Avg episode reward: [(0, '55.404')] -[2024-07-05 14:04:24,635][03932] Updated weights for policy 0, policy_version 48848 (0.0008) -[2024-07-05 14:04:26,358][03932] Updated weights for policy 0, policy_version 48858 (0.0007) -[2024-07-05 14:04:28,057][03932] Updated weights for policy 0, policy_version 48868 (0.0010) -[2024-07-05 14:04:28,191][03359] Fps is (10 sec: 48333.2, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 380321792. Throughput: 0: 12048.3. Samples: 7569144. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:04:28,192][03359] Avg episode reward: [(0, '51.768')] -[2024-07-05 14:04:29,753][03932] Updated weights for policy 0, policy_version 48878 (0.0008) -[2024-07-05 14:04:31,449][03932] Updated weights for policy 0, policy_version 48888 (0.0008) -[2024-07-05 14:04:33,130][03932] Updated weights for policy 0, policy_version 48898 (0.0008) -[2024-07-05 14:04:33,191][03359] Fps is (10 sec: 48332.2, 60 sec: 48196.1, 300 sec: 48291.1). Total num frames: 380567552. Throughput: 0: 12050.5. Samples: 7641224. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:04:33,192][03359] Avg episode reward: [(0, '53.736')] -[2024-07-05 14:04:34,835][03932] Updated weights for policy 0, policy_version 48908 (0.0008) -[2024-07-05 14:04:36,524][03932] Updated weights for policy 0, policy_version 48918 (0.0008) -[2024-07-05 14:04:38,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48196.4, 300 sec: 48263.4). Total num frames: 380805120. Throughput: 0: 12047.4. Samples: 7677508. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:04:38,192][03359] Avg episode reward: [(0, '52.093')] -[2024-07-05 14:04:38,198][03932] Updated weights for policy 0, policy_version 48928 (0.0007) -[2024-07-05 14:04:39,898][03932] Updated weights for policy 0, policy_version 48938 (0.0008) -[2024-07-05 14:04:41,618][03932] Updated weights for policy 0, policy_version 48948 (0.0008) -[2024-07-05 14:04:43,191][03359] Fps is (10 sec: 48333.5, 60 sec: 48196.2, 300 sec: 48291.2). Total num frames: 381050880. Throughput: 0: 12049.2. Samples: 7749932. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:04:43,193][03359] Avg episode reward: [(0, '52.560')] -[2024-07-05 14:04:43,335][03932] Updated weights for policy 0, policy_version 48958 (0.0008) -[2024-07-05 14:04:45,014][03932] Updated weights for policy 0, policy_version 48968 (0.0008) -[2024-07-05 14:04:46,732][03932] Updated weights for policy 0, policy_version 48978 (0.0007) -[2024-07-05 14:04:48,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 381288448. Throughput: 0: 12045.5. Samples: 7822076. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:04:48,193][03359] Avg episode reward: [(0, '55.607')] -[2024-07-05 14:04:48,416][03932] Updated weights for policy 0, policy_version 48988 (0.0010) -[2024-07-05 14:04:50,108][03932] Updated weights for policy 0, policy_version 48998 (0.0008) -[2024-07-05 14:04:51,806][03932] Updated weights for policy 0, policy_version 49008 (0.0008) -[2024-07-05 14:04:53,191][03359] Fps is (10 sec: 47513.6, 60 sec: 48059.9, 300 sec: 48263.4). Total num frames: 381526016. Throughput: 0: 12044.7. Samples: 7858148. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:04:53,192][03359] Avg episode reward: [(0, '54.841')] -[2024-07-05 14:04:53,526][03932] Updated weights for policy 0, policy_version 49018 (0.0011) -[2024-07-05 14:04:55,215][03932] Updated weights for policy 0, policy_version 49028 (0.0008) -[2024-07-05 14:04:56,927][03932] Updated weights for policy 0, policy_version 49038 (0.0008) -[2024-07-05 14:04:58,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48196.4, 300 sec: 48263.4). Total num frames: 381771776. Throughput: 0: 12042.9. Samples: 7930568. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:04:58,192][03359] Avg episode reward: [(0, '56.017')] -[2024-07-05 14:04:58,627][03932] Updated weights for policy 0, policy_version 49048 (0.0008) -[2024-07-05 14:05:00,321][03932] Updated weights for policy 0, policy_version 49058 (0.0008) -[2024-07-05 14:05:02,028][03932] Updated weights for policy 0, policy_version 49068 (0.0008) -[2024-07-05 14:05:03,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48059.8, 300 sec: 48263.4). Total num frames: 382009344. Throughput: 0: 12037.0. Samples: 8002740. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:05:03,192][03359] Avg episode reward: [(0, '56.578')] -[2024-07-05 14:05:03,197][03912] Saving new best policy, reward=56.578! -[2024-07-05 14:05:03,751][03932] Updated weights for policy 0, policy_version 49078 (0.0011) -[2024-07-05 14:05:05,401][03932] Updated weights for policy 0, policy_version 49088 (0.0010) -[2024-07-05 14:05:07,170][03932] Updated weights for policy 0, policy_version 49098 (0.0008) -[2024-07-05 14:05:08,191][03359] Fps is (10 sec: 47513.5, 60 sec: 48059.8, 300 sec: 48235.6). Total num frames: 382246912. Throughput: 0: 12038.5. Samples: 8038740. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:05:08,192][03359] Avg episode reward: [(0, '51.510')] -[2024-07-05 14:05:08,878][03932] Updated weights for policy 0, policy_version 49108 (0.0008) -[2024-07-05 14:05:10,535][03932] Updated weights for policy 0, policy_version 49118 (0.0008) -[2024-07-05 14:05:12,256][03932] Updated weights for policy 0, policy_version 49128 (0.0008) -[2024-07-05 14:05:13,191][03359] Fps is (10 sec: 48331.4, 60 sec: 48196.2, 300 sec: 48263.3). Total num frames: 382492672. Throughput: 0: 12037.4. Samples: 8110828. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:05:13,193][03359] Avg episode reward: [(0, '54.981')] -[2024-07-05 14:05:13,922][03932] Updated weights for policy 0, policy_version 49138 (0.0008) -[2024-07-05 14:05:15,619][03932] Updated weights for policy 0, policy_version 49148 (0.0008) -[2024-07-05 14:05:17,311][03932] Updated weights for policy 0, policy_version 49158 (0.0008) -[2024-07-05 14:05:18,191][03359] Fps is (10 sec: 49151.1, 60 sec: 48332.7, 300 sec: 48291.1). Total num frames: 382738432. Throughput: 0: 12053.9. Samples: 8183652. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:05:18,192][03359] Avg episode reward: [(0, '54.426')] -[2024-07-05 14:05:19,030][03932] Updated weights for policy 0, policy_version 49168 (0.0008) -[2024-07-05 14:05:20,713][03932] Updated weights for policy 0, policy_version 49178 (0.0008) -[2024-07-05 14:05:22,393][03932] Updated weights for policy 0, policy_version 49188 (0.0008) -[2024-07-05 14:05:23,191][03359] Fps is (10 sec: 48333.6, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 382976000. Throughput: 0: 12051.9. Samples: 8219844. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:05:23,192][03359] Avg episode reward: [(0, '53.954')] -[2024-07-05 14:05:24,112][03932] Updated weights for policy 0, policy_version 49198 (0.0007) -[2024-07-05 14:05:25,842][03932] Updated weights for policy 0, policy_version 49208 (0.0009) -[2024-07-05 14:05:27,544][03932] Updated weights for policy 0, policy_version 49218 (0.0008) -[2024-07-05 14:05:28,191][03359] Fps is (10 sec: 47514.6, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 383213568. Throughput: 0: 12055.0. Samples: 8292408. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:05:28,192][03359] Avg episode reward: [(0, '54.789')] -[2024-07-05 14:05:29,221][03932] Updated weights for policy 0, policy_version 49228 (0.0008) -[2024-07-05 14:05:30,911][03932] Updated weights for policy 0, policy_version 49238 (0.0012) -[2024-07-05 14:05:32,595][03932] Updated weights for policy 0, policy_version 49248 (0.0009) -[2024-07-05 14:05:33,191][03359] Fps is (10 sec: 48333.3, 60 sec: 48196.4, 300 sec: 48263.4). Total num frames: 383459328. Throughput: 0: 12054.5. Samples: 8364528. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:05:33,192][03359] Avg episode reward: [(0, '51.787')] -[2024-07-05 14:05:34,310][03932] Updated weights for policy 0, policy_version 49258 (0.0008) -[2024-07-05 14:05:36,024][03932] Updated weights for policy 0, policy_version 49268 (0.0008) -[2024-07-05 14:05:37,735][03932] Updated weights for policy 0, policy_version 49278 (0.0008) -[2024-07-05 14:05:38,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 383696896. Throughput: 0: 12055.0. Samples: 8400624. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:05:38,192][03359] Avg episode reward: [(0, '56.194')] -[2024-07-05 14:05:39,424][03932] Updated weights for policy 0, policy_version 49288 (0.0009) -[2024-07-05 14:05:41,160][03932] Updated weights for policy 0, policy_version 49298 (0.0008) -[2024-07-05 14:05:42,881][03932] Updated weights for policy 0, policy_version 49308 (0.0011) -[2024-07-05 14:05:43,191][03359] Fps is (10 sec: 47513.5, 60 sec: 48059.7, 300 sec: 48235.6). Total num frames: 383934464. Throughput: 0: 12050.0. Samples: 8472816. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:05:43,192][03359] Avg episode reward: [(0, '55.061')] -[2024-07-05 14:05:43,215][03912] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000049310_383942656.pth... -[2024-07-05 14:05:43,287][03912] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000047896_372359168.pth -[2024-07-05 14:05:44,559][03932] Updated weights for policy 0, policy_version 49318 (0.0008) -[2024-07-05 14:05:46,280][03932] Updated weights for policy 0, policy_version 49328 (0.0008) -[2024-07-05 14:05:47,965][03932] Updated weights for policy 0, policy_version 49338 (0.0007) -[2024-07-05 14:05:48,192][03359] Fps is (10 sec: 48331.2, 60 sec: 48196.0, 300 sec: 48263.3). Total num frames: 384180224. Throughput: 0: 12041.1. Samples: 8544592. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 14:05:48,193][03359] Avg episode reward: [(0, '52.303')] -[2024-07-05 14:05:49,676][03932] Updated weights for policy 0, policy_version 49348 (0.0008) -[2024-07-05 14:05:51,386][03932] Updated weights for policy 0, policy_version 49358 (0.0011) -[2024-07-05 14:05:53,073][03932] Updated weights for policy 0, policy_version 49368 (0.0010) -[2024-07-05 14:05:53,191][03359] Fps is (10 sec: 48331.7, 60 sec: 48196.1, 300 sec: 48263.3). Total num frames: 384417792. Throughput: 0: 12032.6. Samples: 8580208. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:05:53,192][03359] Avg episode reward: [(0, '53.861')] -[2024-07-05 14:05:54,742][03932] Updated weights for policy 0, policy_version 49378 (0.0010) -[2024-07-05 14:05:56,462][03932] Updated weights for policy 0, policy_version 49388 (0.0008) -[2024-07-05 14:05:58,191][03359] Fps is (10 sec: 47514.7, 60 sec: 48059.7, 300 sec: 48235.6). Total num frames: 384655360. Throughput: 0: 12048.5. Samples: 8653008. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:05:58,193][03359] Avg episode reward: [(0, '54.906')] -[2024-07-05 14:05:58,221][03932] Updated weights for policy 0, policy_version 49398 (0.0010) -[2024-07-05 14:05:59,938][03932] Updated weights for policy 0, policy_version 49408 (0.0008) -[2024-07-05 14:06:01,623][03932] Updated weights for policy 0, policy_version 49418 (0.0010) -[2024-07-05 14:06:03,191][03359] Fps is (10 sec: 48333.2, 60 sec: 48196.1, 300 sec: 48263.3). Total num frames: 384901120. Throughput: 0: 12023.9. Samples: 8724728. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:06:03,192][03359] Avg episode reward: [(0, '54.455')] -[2024-07-05 14:06:03,282][03932] Updated weights for policy 0, policy_version 49428 (0.0007) -[2024-07-05 14:06:05,006][03932] Updated weights for policy 0, policy_version 49438 (0.0008) -[2024-07-05 14:06:06,710][03932] Updated weights for policy 0, policy_version 49448 (0.0008) -[2024-07-05 14:06:08,191][03359] Fps is (10 sec: 48333.2, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 385138688. Throughput: 0: 12026.6. Samples: 8761040. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:06:08,192][03359] Avg episode reward: [(0, '54.165')] -[2024-07-05 14:06:08,421][03932] Updated weights for policy 0, policy_version 49458 (0.0008) -[2024-07-05 14:06:10,098][03932] Updated weights for policy 0, policy_version 49468 (0.0008) -[2024-07-05 14:06:11,786][03932] Updated weights for policy 0, policy_version 49478 (0.0010) -[2024-07-05 14:06:13,191][03359] Fps is (10 sec: 48333.8, 60 sec: 48196.5, 300 sec: 48263.4). Total num frames: 385384448. Throughput: 0: 12020.4. Samples: 8833324. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:06:13,192][03359] Avg episode reward: [(0, '54.241')] -[2024-07-05 14:06:13,472][03932] Updated weights for policy 0, policy_version 49488 (0.0008) -[2024-07-05 14:06:15,199][03932] Updated weights for policy 0, policy_version 49498 (0.0012) -[2024-07-05 14:06:16,887][03932] Updated weights for policy 0, policy_version 49508 (0.0008) -[2024-07-05 14:06:18,191][03359] Fps is (10 sec: 48332.0, 60 sec: 48059.8, 300 sec: 48235.6). Total num frames: 385622016. Throughput: 0: 12022.3. Samples: 8905532. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:06:18,192][03359] Avg episode reward: [(0, '55.841')] -[2024-07-05 14:06:18,615][03932] Updated weights for policy 0, policy_version 49518 (0.0007) -[2024-07-05 14:06:20,303][03932] Updated weights for policy 0, policy_version 49528 (0.0008) -[2024-07-05 14:06:21,998][03932] Updated weights for policy 0, policy_version 49538 (0.0008) -[2024-07-05 14:06:23,191][03359] Fps is (10 sec: 47513.3, 60 sec: 48059.8, 300 sec: 48235.6). Total num frames: 385859584. Throughput: 0: 12022.0. Samples: 8941616. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 14:06:23,192][03359] Avg episode reward: [(0, '54.725')] -[2024-07-05 14:06:23,673][03932] Updated weights for policy 0, policy_version 49548 (0.0008) -[2024-07-05 14:06:25,336][03932] Updated weights for policy 0, policy_version 49558 (0.0008) -[2024-07-05 14:06:27,067][03932] Updated weights for policy 0, policy_version 49568 (0.0010) -[2024-07-05 14:06:28,191][03359] Fps is (10 sec: 48333.7, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 386105344. Throughput: 0: 12029.0. Samples: 9014120. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 14:06:28,192][03359] Avg episode reward: [(0, '54.076')] -[2024-07-05 14:06:28,783][03932] Updated weights for policy 0, policy_version 49578 (0.0007) -[2024-07-05 14:06:30,454][03932] Updated weights for policy 0, policy_version 49588 (0.0008) -[2024-07-05 14:06:32,155][03932] Updated weights for policy 0, policy_version 49598 (0.0008) -[2024-07-05 14:06:33,191][03359] Fps is (10 sec: 49152.1, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 386351104. Throughput: 0: 12048.5. Samples: 9086772. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 14:06:33,192][03359] Avg episode reward: [(0, '53.956')] -[2024-07-05 14:06:33,843][03932] Updated weights for policy 0, policy_version 49608 (0.0007) -[2024-07-05 14:06:35,529][03932] Updated weights for policy 0, policy_version 49618 (0.0008) -[2024-07-05 14:06:37,263][03932] Updated weights for policy 0, policy_version 49628 (0.0010) -[2024-07-05 14:06:38,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 386588672. Throughput: 0: 12062.3. Samples: 9123008. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 14:06:38,192][03359] Avg episode reward: [(0, '55.075')] -[2024-07-05 14:06:38,936][03932] Updated weights for policy 0, policy_version 49638 (0.0008) -[2024-07-05 14:06:40,621][03932] Updated weights for policy 0, policy_version 49648 (0.0009) -[2024-07-05 14:06:42,305][03932] Updated weights for policy 0, policy_version 49658 (0.0008) -[2024-07-05 14:06:43,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48263.5). Total num frames: 386834432. Throughput: 0: 12061.0. Samples: 9195752. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 14:06:43,192][03359] Avg episode reward: [(0, '53.518')] -[2024-07-05 14:06:44,006][03932] Updated weights for policy 0, policy_version 49668 (0.0010) -[2024-07-05 14:06:45,747][03932] Updated weights for policy 0, policy_version 49678 (0.0012) -[2024-07-05 14:06:47,438][03932] Updated weights for policy 0, policy_version 49688 (0.0007) -[2024-07-05 14:06:48,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48196.5, 300 sec: 48235.6). Total num frames: 387072000. Throughput: 0: 12064.6. Samples: 9267632. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) -[2024-07-05 14:06:48,192][03359] Avg episode reward: [(0, '54.545')] -[2024-07-05 14:06:49,190][03932] Updated weights for policy 0, policy_version 49698 (0.0007) -[2024-07-05 14:06:50,876][03932] Updated weights for policy 0, policy_version 49708 (0.0008) -[2024-07-05 14:06:52,503][03932] Updated weights for policy 0, policy_version 49718 (0.0008) -[2024-07-05 14:06:53,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48333.0, 300 sec: 48263.4). Total num frames: 387317760. Throughput: 0: 12062.5. Samples: 9303852. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:06:53,192][03359] Avg episode reward: [(0, '49.505')] -[2024-07-05 14:06:54,229][03932] Updated weights for policy 0, policy_version 49728 (0.0010) -[2024-07-05 14:06:55,919][03932] Updated weights for policy 0, policy_version 49738 (0.0008) -[2024-07-05 14:06:57,622][03932] Updated weights for policy 0, policy_version 49748 (0.0009) -[2024-07-05 14:06:58,191][03359] Fps is (10 sec: 48333.1, 60 sec: 48332.9, 300 sec: 48235.6). Total num frames: 387555328. Throughput: 0: 12063.7. Samples: 9376192. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:06:58,192][03359] Avg episode reward: [(0, '50.946')] -[2024-07-05 14:06:59,318][03932] Updated weights for policy 0, policy_version 49758 (0.0008) -[2024-07-05 14:07:01,025][03932] Updated weights for policy 0, policy_version 49768 (0.0008) -[2024-07-05 14:07:02,737][03932] Updated weights for policy 0, policy_version 49778 (0.0009) -[2024-07-05 14:07:03,191][03359] Fps is (10 sec: 47513.5, 60 sec: 48196.4, 300 sec: 48235.6). Total num frames: 387792896. Throughput: 0: 12065.8. Samples: 9448492. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:07:03,192][03359] Avg episode reward: [(0, '53.327')] -[2024-07-05 14:07:04,413][03932] Updated weights for policy 0, policy_version 49788 (0.0008) -[2024-07-05 14:07:06,135][03932] Updated weights for policy 0, policy_version 49798 (0.0008) -[2024-07-05 14:07:07,857][03932] Updated weights for policy 0, policy_version 49808 (0.0012) -[2024-07-05 14:07:08,191][03359] Fps is (10 sec: 48332.5, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 388038656. Throughput: 0: 12064.8. Samples: 9484532. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:07:08,192][03359] Avg episode reward: [(0, '52.401')] -[2024-07-05 14:07:09,542][03932] Updated weights for policy 0, policy_version 49818 (0.0010) -[2024-07-05 14:07:11,235][03932] Updated weights for policy 0, policy_version 49828 (0.0008) -[2024-07-05 14:07:12,963][03932] Updated weights for policy 0, policy_version 49838 (0.0008) -[2024-07-05 14:07:13,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 388276224. Throughput: 0: 12058.2. Samples: 9556740. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:07:13,192][03359] Avg episode reward: [(0, '54.895')] -[2024-07-05 14:07:14,640][03932] Updated weights for policy 0, policy_version 49848 (0.0008) -[2024-07-05 14:07:16,343][03932] Updated weights for policy 0, policy_version 49858 (0.0008) -[2024-07-05 14:07:18,019][03932] Updated weights for policy 0, policy_version 49868 (0.0011) -[2024-07-05 14:07:18,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48333.0, 300 sec: 48235.6). Total num frames: 388521984. Throughput: 0: 12054.8. Samples: 9629236. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:07:18,192][03359] Avg episode reward: [(0, '55.879')] -[2024-07-05 14:07:19,738][03932] Updated weights for policy 0, policy_version 49878 (0.0010) -[2024-07-05 14:07:21,425][03932] Updated weights for policy 0, policy_version 49888 (0.0011) -[2024-07-05 14:07:23,116][03932] Updated weights for policy 0, policy_version 49898 (0.0007) -[2024-07-05 14:07:23,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 388759552. Throughput: 0: 12050.4. Samples: 9665276. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:07:23,192][03359] Avg episode reward: [(0, '52.981')] -[2024-07-05 14:07:24,820][03932] Updated weights for policy 0, policy_version 49908 (0.0010) -[2024-07-05 14:07:26,511][03932] Updated weights for policy 0, policy_version 49918 (0.0008) -[2024-07-05 14:07:28,191][03359] Fps is (10 sec: 47513.4, 60 sec: 48196.2, 300 sec: 48207.9). Total num frames: 388997120. Throughput: 0: 12044.0. Samples: 9737732. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:07:28,192][03359] Avg episode reward: [(0, '53.090')] -[2024-07-05 14:07:28,206][03932] Updated weights for policy 0, policy_version 49928 (0.0009) -[2024-07-05 14:07:29,911][03932] Updated weights for policy 0, policy_version 49938 (0.0010) -[2024-07-05 14:07:31,607][03932] Updated weights for policy 0, policy_version 49948 (0.0008) -[2024-07-05 14:07:33,191][03359] Fps is (10 sec: 48332.0, 60 sec: 48196.1, 300 sec: 48235.6). Total num frames: 389242880. Throughput: 0: 12056.4. Samples: 9810172. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:07:33,192][03359] Avg episode reward: [(0, '55.501')] -[2024-07-05 14:07:33,282][03932] Updated weights for policy 0, policy_version 49958 (0.0008) -[2024-07-05 14:07:35,004][03932] Updated weights for policy 0, policy_version 49968 (0.0007) -[2024-07-05 14:07:36,702][03932] Updated weights for policy 0, policy_version 49978 (0.0007) -[2024-07-05 14:07:38,191][03359] Fps is (10 sec: 49152.7, 60 sec: 48332.9, 300 sec: 48263.4). Total num frames: 389488640. Throughput: 0: 12062.3. Samples: 9846656. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:07:38,192][03359] Avg episode reward: [(0, '54.055')] -[2024-07-05 14:07:38,400][03932] Updated weights for policy 0, policy_version 49988 (0.0008) -[2024-07-05 14:07:40,100][03932] Updated weights for policy 0, policy_version 49998 (0.0008) -[2024-07-05 14:07:41,785][03932] Updated weights for policy 0, policy_version 50008 (0.0008) -[2024-07-05 14:07:43,191][03359] Fps is (10 sec: 48333.5, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 389726208. Throughput: 0: 12061.9. Samples: 9918980. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:07:43,192][03359] Avg episode reward: [(0, '54.643')] -[2024-07-05 14:07:43,196][03912] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000050016_389726208.pth... -[2024-07-05 14:07:43,266][03912] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000048604_378159104.pth -[2024-07-05 14:07:43,468][03932] Updated weights for policy 0, policy_version 50018 (0.0012) -[2024-07-05 14:07:45,180][03932] Updated weights for policy 0, policy_version 50028 (0.0009) -[2024-07-05 14:07:46,896][03932] Updated weights for policy 0, policy_version 50038 (0.0008) -[2024-07-05 14:07:48,191][03359] Fps is (10 sec: 47513.4, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 389963776. Throughput: 0: 12067.3. Samples: 9991520. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:07:48,192][03359] Avg episode reward: [(0, '54.551')] -[2024-07-05 14:07:48,552][03932] Updated weights for policy 0, policy_version 50048 (0.0009) -[2024-07-05 14:07:50,256][03932] Updated weights for policy 0, policy_version 50058 (0.0008) -[2024-07-05 14:07:51,938][03932] Updated weights for policy 0, policy_version 50068 (0.0008) -[2024-07-05 14:07:53,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 390209536. Throughput: 0: 12068.0. Samples: 10027592. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:07:53,192][03359] Avg episode reward: [(0, '53.829')] -[2024-07-05 14:07:53,633][03932] Updated weights for policy 0, policy_version 50078 (0.0007) -[2024-07-05 14:07:55,336][03932] Updated weights for policy 0, policy_version 50088 (0.0008) -[2024-07-05 14:07:57,031][03932] Updated weights for policy 0, policy_version 50098 (0.0008) -[2024-07-05 14:07:58,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 390447104. Throughput: 0: 12075.0. Samples: 10100116. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:07:58,192][03359] Avg episode reward: [(0, '54.250')] -[2024-07-05 14:07:58,742][03932] Updated weights for policy 0, policy_version 50108 (0.0008) -[2024-07-05 14:08:00,387][03932] Updated weights for policy 0, policy_version 50118 (0.0011) -[2024-07-05 14:08:02,098][03932] Updated weights for policy 0, policy_version 50128 (0.0008) -[2024-07-05 14:08:03,191][03359] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 390692864. Throughput: 0: 12079.0. Samples: 10172792. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:08:03,192][03359] Avg episode reward: [(0, '51.123')] -[2024-07-05 14:08:03,770][03932] Updated weights for policy 0, policy_version 50138 (0.0010) -[2024-07-05 14:08:05,491][03932] Updated weights for policy 0, policy_version 50148 (0.0008) -[2024-07-05 14:08:07,213][03932] Updated weights for policy 0, policy_version 50158 (0.0010) -[2024-07-05 14:08:08,191][03359] Fps is (10 sec: 48331.5, 60 sec: 48196.1, 300 sec: 48207.8). Total num frames: 390930432. Throughput: 0: 12086.1. Samples: 10209152. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:08:08,192][03359] Avg episode reward: [(0, '53.406')] -[2024-07-05 14:08:08,902][03932] Updated weights for policy 0, policy_version 50168 (0.0009) -[2024-07-05 14:08:10,638][03932] Updated weights for policy 0, policy_version 50178 (0.0008) -[2024-07-05 14:08:12,307][03932] Updated weights for policy 0, policy_version 50188 (0.0008) -[2024-07-05 14:08:13,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 391176192. Throughput: 0: 12080.4. Samples: 10281348. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:08:13,192][03359] Avg episode reward: [(0, '54.415')] -[2024-07-05 14:08:13,965][03932] Updated weights for policy 0, policy_version 50198 (0.0010) -[2024-07-05 14:08:15,659][03932] Updated weights for policy 0, policy_version 50208 (0.0007) -[2024-07-05 14:08:17,356][03932] Updated weights for policy 0, policy_version 50218 (0.0008) -[2024-07-05 14:08:18,191][03359] Fps is (10 sec: 48333.5, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 391413760. Throughput: 0: 12081.3. Samples: 10353832. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:08:18,192][03359] Avg episode reward: [(0, '53.722')] -[2024-07-05 14:08:19,058][03932] Updated weights for policy 0, policy_version 50228 (0.0011) -[2024-07-05 14:08:20,757][03932] Updated weights for policy 0, policy_version 50238 (0.0008) -[2024-07-05 14:08:22,462][03932] Updated weights for policy 0, policy_version 50248 (0.0008) -[2024-07-05 14:08:23,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 391659520. Throughput: 0: 12068.8. Samples: 10389752. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:08:23,192][03359] Avg episode reward: [(0, '52.594')] -[2024-07-05 14:08:24,168][03932] Updated weights for policy 0, policy_version 50258 (0.0008) -[2024-07-05 14:08:25,831][03932] Updated weights for policy 0, policy_version 50268 (0.0010) -[2024-07-05 14:08:27,546][03932] Updated weights for policy 0, policy_version 50278 (0.0008) -[2024-07-05 14:08:28,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48207.8). Total num frames: 391897088. Throughput: 0: 12077.8. Samples: 10462480. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:08:28,192][03359] Avg episode reward: [(0, '53.421')] -[2024-07-05 14:08:29,247][03932] Updated weights for policy 0, policy_version 50288 (0.0008) -[2024-07-05 14:08:30,956][03932] Updated weights for policy 0, policy_version 50298 (0.0007) -[2024-07-05 14:08:32,674][03932] Updated weights for policy 0, policy_version 50308 (0.0007) -[2024-07-05 14:08:33,191][03359] Fps is (10 sec: 48333.2, 60 sec: 48333.0, 300 sec: 48235.6). Total num frames: 392142848. Throughput: 0: 12071.6. Samples: 10534740. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:08:33,192][03359] Avg episode reward: [(0, '52.755')] -[2024-07-05 14:08:34,360][03932] Updated weights for policy 0, policy_version 50318 (0.0008) -[2024-07-05 14:08:36,117][03932] Updated weights for policy 0, policy_version 50328 (0.0010) -[2024-07-05 14:08:37,792][03932] Updated weights for policy 0, policy_version 50338 (0.0010) -[2024-07-05 14:08:38,191][03359] Fps is (10 sec: 48333.3, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 392380416. Throughput: 0: 12065.9. Samples: 10570556. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:08:38,193][03359] Avg episode reward: [(0, '53.262')] -[2024-07-05 14:08:39,472][03932] Updated weights for policy 0, policy_version 50348 (0.0009) -[2024-07-05 14:08:41,173][03932] Updated weights for policy 0, policy_version 50358 (0.0008) -[2024-07-05 14:08:42,846][03932] Updated weights for policy 0, policy_version 50368 (0.0008) -[2024-07-05 14:08:43,191][03359] Fps is (10 sec: 48332.3, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 392626176. Throughput: 0: 12070.5. Samples: 10643288. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:08:43,192][03359] Avg episode reward: [(0, '51.672')] -[2024-07-05 14:08:44,573][03932] Updated weights for policy 0, policy_version 50378 (0.0008) -[2024-07-05 14:08:46,279][03932] Updated weights for policy 0, policy_version 50388 (0.0009) -[2024-07-05 14:08:47,988][03932] Updated weights for policy 0, policy_version 50398 (0.0008) -[2024-07-05 14:08:48,191][03359] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48207.9). Total num frames: 392863744. Throughput: 0: 12056.6. Samples: 10715340. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:08:48,192][03359] Avg episode reward: [(0, '51.907')] -[2024-07-05 14:08:49,719][03932] Updated weights for policy 0, policy_version 50408 (0.0008) -[2024-07-05 14:08:51,385][03932] Updated weights for policy 0, policy_version 50418 (0.0008) -[2024-07-05 14:08:53,067][03932] Updated weights for policy 0, policy_version 50428 (0.0013) -[2024-07-05 14:08:53,191][03359] Fps is (10 sec: 47513.8, 60 sec: 48196.3, 300 sec: 48207.9). Total num frames: 393101312. Throughput: 0: 12045.7. Samples: 10751204. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:08:53,192][03359] Avg episode reward: [(0, '53.512')] -[2024-07-05 14:08:54,736][03932] Updated weights for policy 0, policy_version 50438 (0.0008) -[2024-07-05 14:08:56,428][03932] Updated weights for policy 0, policy_version 50448 (0.0008) -[2024-07-05 14:08:58,153][03932] Updated weights for policy 0, policy_version 50458 (0.0010) -[2024-07-05 14:08:58,191][03359] Fps is (10 sec: 48331.6, 60 sec: 48332.6, 300 sec: 48207.8). Total num frames: 393347072. Throughput: 0: 12053.5. Samples: 10823760. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:08:58,193][03359] Avg episode reward: [(0, '51.288')] -[2024-07-05 14:08:59,864][03932] Updated weights for policy 0, policy_version 50468 (0.0008) -[2024-07-05 14:09:01,558][03932] Updated weights for policy 0, policy_version 50478 (0.0013) -[2024-07-05 14:09:03,192][03359] Fps is (10 sec: 48330.4, 60 sec: 48195.9, 300 sec: 48207.8). Total num frames: 393584640. Throughput: 0: 12053.3. Samples: 10896236. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:09:03,193][03359] Avg episode reward: [(0, '50.242')] -[2024-07-05 14:09:03,219][03932] Updated weights for policy 0, policy_version 50488 (0.0011) -[2024-07-05 14:09:04,926][03932] Updated weights for policy 0, policy_version 50498 (0.0009) -[2024-07-05 14:09:06,659][03932] Updated weights for policy 0, policy_version 50508 (0.0008) -[2024-07-05 14:09:08,191][03359] Fps is (10 sec: 47514.0, 60 sec: 48196.4, 300 sec: 48207.8). Total num frames: 393822208. Throughput: 0: 12062.5. Samples: 10932568. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:09:08,192][03359] Avg episode reward: [(0, '52.859')] -[2024-07-05 14:09:08,315][03932] Updated weights for policy 0, policy_version 50518 (0.0008) -[2024-07-05 14:09:10,029][03932] Updated weights for policy 0, policy_version 50528 (0.0009) -[2024-07-05 14:09:11,712][03932] Updated weights for policy 0, policy_version 50538 (0.0008) -[2024-07-05 14:09:13,191][03359] Fps is (10 sec: 48334.7, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 394067968. Throughput: 0: 12049.8. Samples: 11004720. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:09:13,192][03359] Avg episode reward: [(0, '53.131')] -[2024-07-05 14:09:13,450][03932] Updated weights for policy 0, policy_version 50548 (0.0009) -[2024-07-05 14:09:15,141][03932] Updated weights for policy 0, policy_version 50558 (0.0009) -[2024-07-05 14:09:16,853][03932] Updated weights for policy 0, policy_version 50568 (0.0008) -[2024-07-05 14:09:18,191][03359] Fps is (10 sec: 48333.5, 60 sec: 48196.4, 300 sec: 48207.8). Total num frames: 394305536. Throughput: 0: 12060.2. Samples: 11077448. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:09:18,192][03359] Avg episode reward: [(0, '54.414')] -[2024-07-05 14:09:18,540][03932] Updated weights for policy 0, policy_version 50578 (0.0008) -[2024-07-05 14:09:20,243][03932] Updated weights for policy 0, policy_version 50588 (0.0011) -[2024-07-05 14:09:21,955][03932] Updated weights for policy 0, policy_version 50598 (0.0007) -[2024-07-05 14:09:23,191][03359] Fps is (10 sec: 48332.5, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 394551296. Throughput: 0: 12061.0. Samples: 11113304. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:09:23,192][03359] Avg episode reward: [(0, '54.868')] -[2024-07-05 14:09:23,672][03932] Updated weights for policy 0, policy_version 50608 (0.0009) -[2024-07-05 14:09:25,386][03932] Updated weights for policy 0, policy_version 50618 (0.0008) -[2024-07-05 14:09:27,054][03932] Updated weights for policy 0, policy_version 50628 (0.0008) -[2024-07-05 14:09:28,191][03359] Fps is (10 sec: 48332.5, 60 sec: 48196.3, 300 sec: 48207.9). Total num frames: 394788864. Throughput: 0: 12050.2. Samples: 11185548. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:09:28,193][03359] Avg episode reward: [(0, '53.338')] -[2024-07-05 14:09:28,739][03932] Updated weights for policy 0, policy_version 50638 (0.0009) -[2024-07-05 14:09:30,414][03932] Updated weights for policy 0, policy_version 50648 (0.0009) -[2024-07-05 14:09:32,102][03932] Updated weights for policy 0, policy_version 50658 (0.0009) -[2024-07-05 14:09:33,191][03359] Fps is (10 sec: 48333.5, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 395034624. Throughput: 0: 12055.1. Samples: 11257820. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:09:33,192][03359] Avg episode reward: [(0, '52.075')] -[2024-07-05 14:09:33,797][03932] Updated weights for policy 0, policy_version 50668 (0.0008) -[2024-07-05 14:09:35,503][03932] Updated weights for policy 0, policy_version 50678 (0.0009) -[2024-07-05 14:09:37,195][03932] Updated weights for policy 0, policy_version 50688 (0.0007) -[2024-07-05 14:09:38,191][03359] Fps is (10 sec: 48333.3, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 395272192. Throughput: 0: 12071.4. Samples: 11294416. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:09:38,192][03359] Avg episode reward: [(0, '52.250')] -[2024-07-05 14:09:38,903][03932] Updated weights for policy 0, policy_version 50698 (0.0008) -[2024-07-05 14:09:40,625][03932] Updated weights for policy 0, policy_version 50708 (0.0008) -[2024-07-05 14:09:42,327][03932] Updated weights for policy 0, policy_version 50718 (0.0008) -[2024-07-05 14:09:43,191][03359] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 395517952. Throughput: 0: 12061.5. Samples: 11366524. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:09:43,192][03359] Avg episode reward: [(0, '51.838')] -[2024-07-05 14:09:43,197][03912] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000050723_395517952.pth... -[2024-07-05 14:09:43,265][03912] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000049310_383942656.pth -[2024-07-05 14:09:44,060][03932] Updated weights for policy 0, policy_version 50728 (0.0008) -[2024-07-05 14:09:45,732][03932] Updated weights for policy 0, policy_version 50738 (0.0008) -[2024-07-05 14:09:47,422][03932] Updated weights for policy 0, policy_version 50748 (0.0008) -[2024-07-05 14:09:48,191][03359] Fps is (10 sec: 48332.5, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 395755520. Throughput: 0: 12052.0. Samples: 11438572. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:09:48,192][03359] Avg episode reward: [(0, '52.358')] -[2024-07-05 14:09:49,135][03932] Updated weights for policy 0, policy_version 50758 (0.0008) -[2024-07-05 14:09:50,856][03932] Updated weights for policy 0, policy_version 50768 (0.0009) -[2024-07-05 14:09:52,544][03932] Updated weights for policy 0, policy_version 50778 (0.0008) -[2024-07-05 14:09:53,191][03359] Fps is (10 sec: 47513.7, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 395993088. Throughput: 0: 12044.0. Samples: 11474548. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:09:53,192][03359] Avg episode reward: [(0, '51.904')] -[2024-07-05 14:09:54,242][03932] Updated weights for policy 0, policy_version 50788 (0.0008) -[2024-07-05 14:09:55,943][03932] Updated weights for policy 0, policy_version 50798 (0.0007) -[2024-07-05 14:09:57,621][03932] Updated weights for policy 0, policy_version 50808 (0.0008) -[2024-07-05 14:09:58,191][03359] Fps is (10 sec: 48332.9, 60 sec: 48196.4, 300 sec: 48235.6). Total num frames: 396238848. Throughput: 0: 12054.9. Samples: 11547188. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:09:58,192][03359] Avg episode reward: [(0, '52.445')] -[2024-07-05 14:09:59,339][03932] Updated weights for policy 0, policy_version 50818 (0.0008) -[2024-07-05 14:10:01,040][03932] Updated weights for policy 0, policy_version 50828 (0.0008) -[2024-07-05 14:10:02,730][03932] Updated weights for policy 0, policy_version 50838 (0.0010) -[2024-07-05 14:10:03,191][03359] Fps is (10 sec: 48332.4, 60 sec: 48196.6, 300 sec: 48235.6). Total num frames: 396476416. Throughput: 0: 12040.9. Samples: 11619288. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:10:03,192][03359] Avg episode reward: [(0, '53.765')] -[2024-07-05 14:10:04,444][03932] Updated weights for policy 0, policy_version 50848 (0.0008) -[2024-07-05 14:10:06,135][03932] Updated weights for policy 0, policy_version 50858 (0.0008) -[2024-07-05 14:10:07,783][03932] Updated weights for policy 0, policy_version 50868 (0.0008) -[2024-07-05 14:10:08,191][03359] Fps is (10 sec: 48332.8, 60 sec: 48332.9, 300 sec: 48235.6). Total num frames: 396722176. Throughput: 0: 12051.1. Samples: 11655600. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:10:08,192][03359] Avg episode reward: [(0, '53.275')] -[2024-07-05 14:10:09,490][03932] Updated weights for policy 0, policy_version 50878 (0.0008) -[2024-07-05 14:10:11,207][03932] Updated weights for policy 0, policy_version 50888 (0.0009) -[2024-07-05 14:10:12,908][03932] Updated weights for policy 0, policy_version 50898 (0.0007) -[2024-07-05 14:10:13,191][03359] Fps is (10 sec: 48331.5, 60 sec: 48196.1, 300 sec: 48207.8). Total num frames: 396959744. Throughput: 0: 12054.1. Samples: 11727988. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:10:13,193][03359] Avg episode reward: [(0, '54.238')] -[2024-07-05 14:10:14,619][03932] Updated weights for policy 0, policy_version 50908 (0.0008) -[2024-07-05 14:10:16,323][03932] Updated weights for policy 0, policy_version 50918 (0.0008) -[2024-07-05 14:10:18,074][03932] Updated weights for policy 0, policy_version 50928 (0.0008) -[2024-07-05 14:10:18,191][03359] Fps is (10 sec: 47513.1, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 397197312. Throughput: 0: 12041.8. Samples: 11799704. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:10:18,192][03359] Avg episode reward: [(0, '53.260')] -[2024-07-05 14:10:19,741][03932] Updated weights for policy 0, policy_version 50938 (0.0009) -[2024-07-05 14:10:21,384][03932] Updated weights for policy 0, policy_version 50948 (0.0007) -[2024-07-05 14:10:23,043][03932] Updated weights for policy 0, policy_version 50958 (0.0009) -[2024-07-05 14:10:23,191][03359] Fps is (10 sec: 48334.5, 60 sec: 48196.4, 300 sec: 48235.6). Total num frames: 397443072. Throughput: 0: 12050.6. Samples: 11836692. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:10:23,192][03359] Avg episode reward: [(0, '51.507')] -[2024-07-05 14:10:24,666][03932] Updated weights for policy 0, policy_version 50968 (0.0007) -[2024-07-05 14:10:26,344][03932] Updated weights for policy 0, policy_version 50978 (0.0007) -[2024-07-05 14:10:28,038][03932] Updated weights for policy 0, policy_version 50988 (0.0008) -[2024-07-05 14:10:28,191][03359] Fps is (10 sec: 49971.6, 60 sec: 48469.4, 300 sec: 48263.4). Total num frames: 397697024. Throughput: 0: 12093.5. Samples: 11910732. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:10:28,192][03359] Avg episode reward: [(0, '54.220')] -[2024-07-05 14:10:29,686][03932] Updated weights for policy 0, policy_version 50998 (0.0007) -[2024-07-05 14:10:31,324][03932] Updated weights for policy 0, policy_version 51008 (0.0008) -[2024-07-05 14:10:32,971][03932] Updated weights for policy 0, policy_version 51018 (0.0007) -[2024-07-05 14:10:33,191][03359] Fps is (10 sec: 49970.8, 60 sec: 48469.3, 300 sec: 48291.1). Total num frames: 397942784. Throughput: 0: 12142.9. Samples: 11985004. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:10:33,192][03359] Avg episode reward: [(0, '52.295')] -[2024-07-05 14:10:34,671][03932] Updated weights for policy 0, policy_version 51028 (0.0008) -[2024-07-05 14:10:36,311][03932] Updated weights for policy 0, policy_version 51038 (0.0009) -[2024-07-05 14:10:37,967][03932] Updated weights for policy 0, policy_version 51048 (0.0008) -[2024-07-05 14:10:38,191][03359] Fps is (10 sec: 49152.2, 60 sec: 48605.8, 300 sec: 48318.9). Total num frames: 398188544. Throughput: 0: 12166.3. Samples: 12022032. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:10:38,192][03359] Avg episode reward: [(0, '51.731')] -[2024-07-05 14:10:39,631][03932] Updated weights for policy 0, policy_version 51058 (0.0007) -[2024-07-05 14:10:41,314][03932] Updated weights for policy 0, policy_version 51068 (0.0009) -[2024-07-05 14:10:42,953][03932] Updated weights for policy 0, policy_version 51078 (0.0007) -[2024-07-05 14:10:43,191][03359] Fps is (10 sec: 49152.2, 60 sec: 48605.9, 300 sec: 48319.0). Total num frames: 398434304. Throughput: 0: 12188.1. Samples: 12095652. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:10:43,192][03359] Avg episode reward: [(0, '52.947')] -[2024-07-05 14:10:44,620][03932] Updated weights for policy 0, policy_version 51088 (0.0010) -[2024-07-05 14:10:46,301][03932] Updated weights for policy 0, policy_version 51098 (0.0008) -[2024-07-05 14:10:47,961][03932] Updated weights for policy 0, policy_version 51108 (0.0008) -[2024-07-05 14:10:48,192][03359] Fps is (10 sec: 49149.1, 60 sec: 48741.9, 300 sec: 48346.6). Total num frames: 398680064. Throughput: 0: 12225.5. Samples: 12169440. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:10:48,193][03359] Avg episode reward: [(0, '54.952')] -[2024-07-05 14:10:49,642][03932] Updated weights for policy 0, policy_version 51118 (0.0007) -[2024-07-05 14:10:51,318][03932] Updated weights for policy 0, policy_version 51128 (0.0010) -[2024-07-05 14:10:52,998][03932] Updated weights for policy 0, policy_version 51138 (0.0010) -[2024-07-05 14:10:53,191][03359] Fps is (10 sec: 49152.2, 60 sec: 48878.9, 300 sec: 48374.5). Total num frames: 398925824. Throughput: 0: 12236.6. Samples: 12206248. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:10:53,192][03359] Avg episode reward: [(0, '53.282')] -[2024-07-05 14:10:54,658][03932] Updated weights for policy 0, policy_version 51148 (0.0007) -[2024-07-05 14:10:56,272][03932] Updated weights for policy 0, policy_version 51158 (0.0007) -[2024-07-05 14:10:57,980][03932] Updated weights for policy 0, policy_version 51168 (0.0010) -[2024-07-05 14:10:58,191][03359] Fps is (10 sec: 49155.0, 60 sec: 48879.0, 300 sec: 48374.5). Total num frames: 399171584. Throughput: 0: 12267.9. Samples: 12280040. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:10:58,192][03359] Avg episode reward: [(0, '54.242')] -[2024-07-05 14:10:59,623][03932] Updated weights for policy 0, policy_version 51178 (0.0007) -[2024-07-05 14:11:01,322][03932] Updated weights for policy 0, policy_version 51188 (0.0008) -[2024-07-05 14:11:02,977][03932] Updated weights for policy 0, policy_version 51198 (0.0008) -[2024-07-05 14:11:03,191][03359] Fps is (10 sec: 49151.3, 60 sec: 49015.4, 300 sec: 48402.2). Total num frames: 399417344. Throughput: 0: 12310.5. Samples: 12353676. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:11:03,192][03359] Avg episode reward: [(0, '56.669')] -[2024-07-05 14:11:03,198][03912] Saving new best policy, reward=56.669! -[2024-07-05 14:11:04,624][03932] Updated weights for policy 0, policy_version 51208 (0.0008) -[2024-07-05 14:11:06,293][03932] Updated weights for policy 0, policy_version 51218 (0.0008) -[2024-07-05 14:11:07,946][03932] Updated weights for policy 0, policy_version 51228 (0.0007) -[2024-07-05 14:11:08,191][03359] Fps is (10 sec: 49152.0, 60 sec: 49015.5, 300 sec: 48402.2). Total num frames: 399663104. Throughput: 0: 12310.1. Samples: 12390648. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:11:08,192][03359] Avg episode reward: [(0, '52.456')] -[2024-07-05 14:11:09,636][03932] Updated weights for policy 0, policy_version 51238 (0.0008) -[2024-07-05 14:11:11,331][03932] Updated weights for policy 0, policy_version 51248 (0.0008) -[2024-07-05 14:11:12,991][03932] Updated weights for policy 0, policy_version 51258 (0.0009) -[2024-07-05 14:11:13,191][03359] Fps is (10 sec: 49152.5, 60 sec: 49152.3, 300 sec: 48430.0). Total num frames: 399908864. Throughput: 0: 12295.8. Samples: 12464044. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:11:13,192][03359] Avg episode reward: [(0, '54.256')] -[2024-07-05 14:11:14,627][03932] Updated weights for policy 0, policy_version 51268 (0.0007) -[2024-07-05 14:11:15,288][03912] Stopping Batcher_0... -[2024-07-05 14:11:15,288][03912] Loop batcher_evt_loop terminating... -[2024-07-05 14:11:15,289][03912] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000051272_400015360.pth... -[2024-07-05 14:11:15,288][03359] Component Batcher_0 stopped! -[2024-07-05 14:11:15,317][03935] Stopping RolloutWorker_w3... -[2024-07-05 14:11:15,317][03935] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 14:11:15,317][03359] Component RolloutWorker_w3 stopped! -[2024-07-05 14:11:15,318][03937] Stopping RolloutWorker_w4... -[2024-07-05 14:11:15,318][03937] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 14:11:15,318][03962] Stopping RolloutWorker_w14... -[2024-07-05 14:11:15,318][03934] Stopping RolloutWorker_w1... -[2024-07-05 14:11:15,318][03359] Component RolloutWorker_w4 stopped! -[2024-07-05 14:11:15,318][03962] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 14:11:15,319][03934] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 14:11:15,318][03933] Stopping RolloutWorker_w0... -[2024-07-05 14:11:15,319][03933] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 14:11:15,319][03963] Stopping RolloutWorker_w15... -[2024-07-05 14:11:15,319][03963] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 14:11:15,320][03940] Stopping RolloutWorker_w7... -[2024-07-05 14:11:15,320][03940] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 14:11:15,320][03941] Stopping RolloutWorker_w8... -[2024-07-05 14:11:15,320][03938] Stopping RolloutWorker_w5... -[2024-07-05 14:11:15,320][03938] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 14:11:15,320][03941] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 14:11:15,321][03939] Stopping RolloutWorker_w6... -[2024-07-05 14:11:15,321][03939] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 14:11:15,321][03951] Stopping RolloutWorker_w12... -[2024-07-05 14:11:15,322][03951] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 14:11:15,324][03936] Stopping RolloutWorker_w2... -[2024-07-05 14:11:15,321][03359] Component RolloutWorker_w14 stopped! -[2024-07-05 14:11:15,325][03936] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 14:11:15,325][03359] Component RolloutWorker_w1 stopped! -[2024-07-05 14:11:15,326][03359] Component RolloutWorker_w0 stopped! -[2024-07-05 14:11:15,326][03359] Component RolloutWorker_w15 stopped! -[2024-07-05 14:11:15,327][03359] Component RolloutWorker_w7 stopped! -[2024-07-05 14:11:15,327][03359] Component RolloutWorker_w8 stopped! -[2024-07-05 14:11:15,327][03359] Component RolloutWorker_w5 stopped! -[2024-07-05 14:11:15,328][03359] Component RolloutWorker_w12 stopped! -[2024-07-05 14:11:15,329][03359] Component RolloutWorker_w6 stopped! -[2024-07-05 14:11:15,329][03359] Component RolloutWorker_w2 stopped! -[2024-07-05 14:11:15,330][03359] Component RolloutWorker_w9 stopped! -[2024-07-05 14:11:15,330][03942] Stopping RolloutWorker_w9... -[2024-07-05 14:11:15,333][03942] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 14:11:15,334][03943] Stopping RolloutWorker_w10... -[2024-07-05 14:11:15,335][03943] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 14:11:15,334][03359] Component RolloutWorker_w10 stopped! -[2024-07-05 14:11:15,347][03944] Stopping RolloutWorker_w11... -[2024-07-05 14:11:15,347][03944] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 14:11:15,347][03359] Component RolloutWorker_w11 stopped! -[2024-07-05 14:11:15,355][03932] Weights refcount: 2 0 -[2024-07-05 14:11:15,357][03932] Stopping InferenceWorker_p0-w0... -[2024-07-05 14:11:15,357][03932] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 14:11:15,357][03359] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 14:11:15,364][03961] Stopping RolloutWorker_w13... -[2024-07-05 14:11:15,364][03961] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 14:11:15,364][03359] Component RolloutWorker_w13 stopped! -[2024-07-05 14:11:15,387][03912] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000050016_389726208.pth -[2024-07-05 14:11:15,396][03912] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000051272_400015360.pth... -[2024-07-05 14:11:15,500][03912] Stopping LearnerWorker_p0... -[2024-07-05 14:11:15,500][03912] Loop learner_proc0_evt_loop terminating... -[2024-07-05 14:11:15,500][03359] Component LearnerWorker_p0 stopped! -[2024-07-05 14:11:15,501][03359] Waiting for process learner_proc0 to stop... -[2024-07-05 14:11:16,733][03359] Waiting for process inference_proc0-0 to join... -[2024-07-05 14:11:16,735][03359] Waiting for process rollout_proc0 to join... -[2024-07-05 14:11:16,735][03359] Waiting for process rollout_proc1 to join... -[2024-07-05 14:11:16,736][03359] Waiting for process rollout_proc2 to join... -[2024-07-05 14:11:16,736][03359] Waiting for process rollout_proc3 to join... -[2024-07-05 14:11:16,737][03359] Waiting for process rollout_proc4 to join... -[2024-07-05 14:11:16,737][03359] Waiting for process rollout_proc5 to join... -[2024-07-05 14:11:16,737][03359] Waiting for process rollout_proc6 to join... -[2024-07-05 14:11:16,737][03359] Waiting for process rollout_proc7 to join... -[2024-07-05 14:11:16,738][03359] Waiting for process rollout_proc8 to join... -[2024-07-05 14:11:16,738][03359] Waiting for process rollout_proc9 to join... -[2024-07-05 14:11:16,738][03359] Waiting for process rollout_proc10 to join... -[2024-07-05 14:11:16,778][03359] Waiting for process rollout_proc11 to join... -[2024-07-05 14:11:16,779][03359] Waiting for process rollout_proc12 to join... -[2024-07-05 14:11:16,780][03359] Waiting for process rollout_proc13 to join... -[2024-07-05 14:11:16,781][03359] Waiting for process rollout_proc14 to join... -[2024-07-05 14:11:16,781][03359] Waiting for process rollout_proc15 to join... -[2024-07-05 14:11:16,781][03359] Batcher 0 profile tree view: -batching: 74.6262, releasing_batches: 0.1417 -[2024-07-05 14:11:16,781][03359] InferenceWorker_p0-w0 profile tree view: +[2024-07-05 10:30:32,991][19499] Using optimizer +[2024-07-05 10:30:33,485][19499] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001224_5013504.pth... +[2024-07-05 10:30:33,518][19499] Loading model from checkpoint +[2024-07-05 10:30:33,519][19499] Loaded experiment state at self.train_step=1224, self.env_steps=5013504 +[2024-07-05 10:30:33,519][19499] Initialized policy 0 weights for model version 1224 +[2024-07-05 10:30:33,521][19499] LearnerWorker_p0 finished initialization! +[2024-07-05 10:30:33,521][19499] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:30:33,579][19513] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 10:30:33,580][19513] RunningMeanStd input shape: (1,) +[2024-07-05 10:30:33,587][19513] Num input channels: 3 +[2024-07-05 10:30:33,597][19513] Convolutional layer output size: 4608 +[2024-07-05 10:30:33,608][19513] Policy head output size: 512 +[2024-07-05 10:30:33,735][17621] Inference worker 0-0 is ready! +[2024-07-05 10:30:33,736][17621] All inference workers are ready! Signal rollout workers to start! +[2024-07-05 10:30:33,775][19518] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:30:33,776][19520] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:30:33,776][19516] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:30:33,777][19515] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:30:33,778][19514] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:30:33,778][19519] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:30:33,779][19512] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:30:33,779][19517] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:30:34,279][19512] Decorrelating experience for 0 frames... +[2024-07-05 10:30:34,280][19515] Decorrelating experience for 0 frames... +[2024-07-05 10:30:34,282][19519] Decorrelating experience for 0 frames... +[2024-07-05 10:30:34,282][19520] Decorrelating experience for 0 frames... +[2024-07-05 10:30:34,282][19518] Decorrelating experience for 0 frames... +[2024-07-05 10:30:34,282][19516] Decorrelating experience for 0 frames... +[2024-07-05 10:30:34,458][19520] Decorrelating experience for 32 frames... +[2024-07-05 10:30:34,458][19518] Decorrelating experience for 32 frames... +[2024-07-05 10:30:34,458][19512] Decorrelating experience for 32 frames... +[2024-07-05 10:30:34,459][19516] Decorrelating experience for 32 frames... +[2024-07-05 10:30:34,459][19519] Decorrelating experience for 32 frames... +[2024-07-05 10:30:34,650][19514] Decorrelating experience for 0 frames... +[2024-07-05 10:30:34,650][19517] Decorrelating experience for 0 frames... +[2024-07-05 10:30:34,683][19512] Decorrelating experience for 64 frames... +[2024-07-05 10:30:34,684][19519] Decorrelating experience for 64 frames... +[2024-07-05 10:30:34,684][19516] Decorrelating experience for 64 frames... +[2024-07-05 10:30:34,685][19520] Decorrelating experience for 64 frames... +[2024-07-05 10:30:34,818][19514] Decorrelating experience for 32 frames... +[2024-07-05 10:30:34,818][19517] Decorrelating experience for 32 frames... +[2024-07-05 10:30:34,855][19515] Decorrelating experience for 32 frames... +[2024-07-05 10:30:34,884][19520] Decorrelating experience for 96 frames... +[2024-07-05 10:30:34,889][19518] Decorrelating experience for 64 frames... +[2024-07-05 10:30:34,993][17621] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 5013504. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:30:35,035][19512] Decorrelating experience for 96 frames... +[2024-07-05 10:30:35,040][19517] Decorrelating experience for 64 frames... +[2024-07-05 10:30:35,040][19514] Decorrelating experience for 64 frames... +[2024-07-05 10:30:35,077][19516] Decorrelating experience for 96 frames... +[2024-07-05 10:30:35,079][19515] Decorrelating experience for 64 frames... +[2024-07-05 10:30:35,083][19518] Decorrelating experience for 96 frames... +[2024-07-05 10:30:35,229][19514] Decorrelating experience for 96 frames... +[2024-07-05 10:30:35,267][19515] Decorrelating experience for 96 frames... +[2024-07-05 10:30:35,275][19519] Decorrelating experience for 96 frames... +[2024-07-05 10:30:35,296][19517] Decorrelating experience for 96 frames... +[2024-07-05 10:30:35,962][19499] Signal inference workers to stop experience collection... +[2024-07-05 10:30:35,968][19513] InferenceWorker_p0-w0: stopping experience collection +[2024-07-05 10:30:39,343][19499] Signal inference workers to resume experience collection... +[2024-07-05 10:30:39,343][19513] InferenceWorker_p0-w0: resuming experience collection +[2024-07-05 10:30:39,993][17621] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 5017600. Throughput: 0: 594.4. Samples: 2972. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-07-05 10:30:39,994][17621] Avg episode reward: [(0, '2.544')] +[2024-07-05 10:30:43,823][19513] Updated weights for policy 0, policy_version 1234 (0.0105) +[2024-07-05 10:30:44,993][17621] Fps is (10 sec: 4915.2, 60 sec: 4915.2, 300 sec: 4915.2). Total num frames: 5062656. Throughput: 0: 864.6. Samples: 8646. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:30:44,994][17621] Avg episode reward: [(0, '15.281')] +[2024-07-05 10:30:48,295][17621] Heartbeat connected on Batcher_0 +[2024-07-05 10:30:48,306][17621] Heartbeat connected on RolloutWorker_w0 +[2024-07-05 10:30:48,309][17621] Heartbeat connected on RolloutWorker_w1 +[2024-07-05 10:30:48,312][17621] Heartbeat connected on RolloutWorker_w2 +[2024-07-05 10:30:48,313][17621] Heartbeat connected on InferenceWorker_p0-w0 +[2024-07-05 10:30:48,315][17621] Heartbeat connected on RolloutWorker_w3 +[2024-07-05 10:30:48,318][17621] Heartbeat connected on RolloutWorker_w4 +[2024-07-05 10:30:48,321][17621] Heartbeat connected on RolloutWorker_w5 +[2024-07-05 10:30:48,326][17621] Heartbeat connected on RolloutWorker_w6 +[2024-07-05 10:30:48,327][17621] Heartbeat connected on RolloutWorker_w7 +[2024-07-05 10:30:48,452][17621] Heartbeat connected on LearnerWorker_p0 +[2024-07-05 10:30:48,454][19513] Updated weights for policy 0, policy_version 1244 (0.0016) +[2024-07-05 10:30:49,993][17621] Fps is (10 sec: 9011.1, 60 sec: 6280.4, 300 sec: 6280.4). Total num frames: 5107712. Throughput: 0: 1459.6. Samples: 21894. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:30:49,995][17621] Avg episode reward: [(0, '23.001')] +[2024-07-05 10:30:53,121][19513] Updated weights for policy 0, policy_version 1254 (0.0016) +[2024-07-05 10:30:54,993][17621] Fps is (10 sec: 9011.0, 60 sec: 6963.1, 300 sec: 6963.1). Total num frames: 5152768. Throughput: 0: 1756.3. Samples: 35126. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:30:54,994][17621] Avg episode reward: [(0, '27.101')] +[2024-07-05 10:30:57,801][19513] Updated weights for policy 0, policy_version 1264 (0.0017) +[2024-07-05 10:30:59,993][17621] Fps is (10 sec: 8601.8, 60 sec: 7209.0, 300 sec: 7209.0). Total num frames: 5193728. Throughput: 0: 1662.2. Samples: 41556. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:30:59,994][17621] Avg episode reward: [(0, '30.378')] +[2024-07-05 10:31:02,475][19513] Updated weights for policy 0, policy_version 1274 (0.0017) +[2024-07-05 10:31:04,993][17621] Fps is (10 sec: 8601.8, 60 sec: 7509.4, 300 sec: 7509.4). Total num frames: 5238784. Throughput: 0: 1826.3. Samples: 54788. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:31:04,994][17621] Avg episode reward: [(0, '29.593')] +[2024-07-05 10:31:07,112][19513] Updated weights for policy 0, policy_version 1284 (0.0015) +[2024-07-05 10:31:09,993][17621] Fps is (10 sec: 9011.2, 60 sec: 7723.9, 300 sec: 7723.9). Total num frames: 5283840. Throughput: 0: 1941.8. Samples: 67964. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:31:09,994][17621] Avg episode reward: [(0, '29.305')] +[2024-07-05 10:31:11,805][19513] Updated weights for policy 0, policy_version 1294 (0.0014) +[2024-07-05 10:31:14,993][17621] Fps is (10 sec: 8601.5, 60 sec: 7782.4, 300 sec: 7782.4). Total num frames: 5324800. Throughput: 0: 1861.8. Samples: 74474. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:31:14,994][17621] Avg episode reward: [(0, '29.975')] +[2024-07-05 10:31:16,469][19513] Updated weights for policy 0, policy_version 1304 (0.0015) +[2024-07-05 10:31:19,993][17621] Fps is (10 sec: 8601.6, 60 sec: 7918.9, 300 sec: 7918.9). Total num frames: 5369856. Throughput: 0: 1947.7. Samples: 87648. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:31:19,994][17621] Avg episode reward: [(0, '28.994')] +[2024-07-05 10:31:21,158][19513] Updated weights for policy 0, policy_version 1314 (0.0016) +[2024-07-05 10:31:24,993][17621] Fps is (10 sec: 9011.3, 60 sec: 8028.2, 300 sec: 8028.2). Total num frames: 5414912. Throughput: 0: 2173.0. Samples: 100756. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:31:24,994][17621] Avg episode reward: [(0, '31.462')] +[2024-07-05 10:31:24,997][19499] Saving new best policy, reward=31.462! +[2024-07-05 10:31:25,902][19513] Updated weights for policy 0, policy_version 1324 (0.0015) +[2024-07-05 10:31:29,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8043.0, 300 sec: 8043.0). Total num frames: 5455872. Throughput: 0: 2189.6. Samples: 107180. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:31:29,994][17621] Avg episode reward: [(0, '31.718')] +[2024-07-05 10:31:30,143][19499] Saving new best policy, reward=31.718! +[2024-07-05 10:31:30,634][19513] Updated weights for policy 0, policy_version 1334 (0.0017) +[2024-07-05 10:31:34,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8123.7, 300 sec: 8123.7). Total num frames: 5500928. Throughput: 0: 2187.3. Samples: 120324. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:31:34,994][17621] Avg episode reward: [(0, '32.623')] +[2024-07-05 10:31:35,323][19499] Saving new best policy, reward=32.623! +[2024-07-05 10:31:35,325][19513] Updated weights for policy 0, policy_version 1344 (0.0016) +[2024-07-05 10:31:39,993][17621] Fps is (10 sec: 8601.7, 60 sec: 8738.2, 300 sec: 8129.0). Total num frames: 5541888. Throughput: 0: 2182.2. Samples: 133324. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:31:39,994][17621] Avg episode reward: [(0, '30.621')] +[2024-07-05 10:31:40,092][19513] Updated weights for policy 0, policy_version 1354 (0.0016) +[2024-07-05 10:31:44,774][19513] Updated weights for policy 0, policy_version 1364 (0.0015) +[2024-07-05 10:31:44,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8738.1, 300 sec: 8192.0). Total num frames: 5586944. Throughput: 0: 2182.2. Samples: 139754. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:31:44,994][17621] Avg episode reward: [(0, '28.763')] +[2024-07-05 10:31:49,537][19513] Updated weights for policy 0, policy_version 1374 (0.0015) +[2024-07-05 10:31:49,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8669.9, 300 sec: 8192.0). Total num frames: 5627904. Throughput: 0: 2180.1. Samples: 152894. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:31:49,994][17621] Avg episode reward: [(0, '29.604')] +[2024-07-05 10:31:54,525][19513] Updated weights for policy 0, policy_version 1384 (0.0019) +[2024-07-05 10:31:54,993][17621] Fps is (10 sec: 8191.9, 60 sec: 8601.6, 300 sec: 8192.0). Total num frames: 5668864. Throughput: 0: 2161.5. Samples: 165230. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:31:54,994][17621] Avg episode reward: [(0, '30.372')] +[2024-07-05 10:31:59,509][19513] Updated weights for policy 0, policy_version 1394 (0.0016) +[2024-07-05 10:31:59,993][17621] Fps is (10 sec: 8192.0, 60 sec: 8601.6, 300 sec: 8192.0). Total num frames: 5709824. Throughput: 0: 2152.9. Samples: 171352. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:31:59,994][17621] Avg episode reward: [(0, '30.498')] +[2024-07-05 10:32:04,383][19513] Updated weights for policy 0, policy_version 1404 (0.0014) +[2024-07-05 10:32:04,993][17621] Fps is (10 sec: 8601.7, 60 sec: 8601.6, 300 sec: 8237.5). Total num frames: 5754880. Throughput: 0: 2138.5. Samples: 183882. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:04,994][17621] Avg episode reward: [(0, '29.607')] +[2024-07-05 10:32:09,137][19513] Updated weights for policy 0, policy_version 1414 (0.0014) +[2024-07-05 10:32:09,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8533.3, 300 sec: 8235.1). Total num frames: 5795840. Throughput: 0: 2136.6. Samples: 196904. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:09,995][17621] Avg episode reward: [(0, '30.198')] +[2024-07-05 10:32:13,865][19513] Updated weights for policy 0, policy_version 1424 (0.0013) +[2024-07-05 10:32:14,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8601.6, 300 sec: 8273.9). Total num frames: 5840896. Throughput: 0: 2135.2. Samples: 203266. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:14,995][17621] Avg episode reward: [(0, '31.204')] +[2024-07-05 10:32:18,562][19513] Updated weights for policy 0, policy_version 1434 (0.0013) +[2024-07-05 10:32:19,993][17621] Fps is (10 sec: 9011.4, 60 sec: 8601.6, 300 sec: 8309.0). Total num frames: 5885952. Throughput: 0: 2136.9. Samples: 216482. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:19,994][17621] Avg episode reward: [(0, '30.875')] +[2024-07-05 10:32:23,238][19513] Updated weights for policy 0, policy_version 1444 (0.0013) +[2024-07-05 10:32:24,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8533.3, 300 sec: 8303.7). Total num frames: 5926912. Throughput: 0: 2141.5. Samples: 229692. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:24,994][17621] Avg episode reward: [(0, '28.836')] +[2024-07-05 10:32:25,079][19499] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001448_5931008.pth... +[2024-07-05 10:32:25,174][19499] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001222_5005312.pth +[2024-07-05 10:32:27,897][19513] Updated weights for policy 0, policy_version 1454 (0.0014) +[2024-07-05 10:32:29,994][17621] Fps is (10 sec: 8601.0, 60 sec: 8601.5, 300 sec: 8334.4). Total num frames: 5971968. Throughput: 0: 2141.4. Samples: 236120. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:29,995][17621] Avg episode reward: [(0, '29.113')] +[2024-07-05 10:32:32,596][19513] Updated weights for policy 0, policy_version 1464 (0.0013) +[2024-07-05 10:32:34,993][17621] Fps is (10 sec: 9011.4, 60 sec: 8601.6, 300 sec: 8362.7). Total num frames: 6017024. Throughput: 0: 2140.8. Samples: 249228. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:34,994][17621] Avg episode reward: [(0, '30.351')] +[2024-07-05 10:32:37,298][19513] Updated weights for policy 0, policy_version 1474 (0.0013) +[2024-07-05 10:32:39,993][17621] Fps is (10 sec: 8602.1, 60 sec: 8601.6, 300 sec: 8355.8). Total num frames: 6057984. Throughput: 0: 2157.4. Samples: 262312. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:39,994][17621] Avg episode reward: [(0, '29.981')] +[2024-07-05 10:32:42,067][19513] Updated weights for policy 0, policy_version 1484 (0.0014) +[2024-07-05 10:32:44,999][17621] Fps is (10 sec: 8596.4, 60 sec: 8600.7, 300 sec: 8380.7). Total num frames: 6103040. Throughput: 0: 2162.9. Samples: 268694. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:45,012][17621] Avg episode reward: [(0, '28.907')] +[2024-07-05 10:32:46,764][19513] Updated weights for policy 0, policy_version 1494 (0.0014) +[2024-07-05 10:32:49,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8601.6, 300 sec: 8374.0). Total num frames: 6144000. Throughput: 0: 2176.8. Samples: 281838. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:49,994][17621] Avg episode reward: [(0, '26.770')] +[2024-07-05 10:32:51,448][19513] Updated weights for policy 0, policy_version 1504 (0.0014) +[2024-07-05 10:32:54,993][17621] Fps is (10 sec: 8606.9, 60 sec: 8669.9, 300 sec: 8396.8). Total num frames: 6189056. Throughput: 0: 2172.1. Samples: 294650. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:32:54,994][17621] Avg episode reward: [(0, '28.208')] +[2024-07-05 10:32:56,204][19513] Updated weights for policy 0, policy_version 1514 (0.0014) +[2024-07-05 10:32:59,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8669.9, 300 sec: 8389.7). Total num frames: 6230016. Throughput: 0: 2178.7. Samples: 301306. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:32:59,994][17621] Avg episode reward: [(0, '30.529')] +[2024-07-05 10:33:00,985][19513] Updated weights for policy 0, policy_version 1524 (0.0013) +[2024-07-05 10:33:04,993][17621] Fps is (10 sec: 8601.4, 60 sec: 8669.9, 300 sec: 8410.4). Total num frames: 6275072. Throughput: 0: 2166.9. Samples: 313994. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:04,994][17621] Avg episode reward: [(0, '32.731')] +[2024-07-05 10:33:05,271][19499] Saving new best policy, reward=32.731! +[2024-07-05 10:33:05,767][19513] Updated weights for policy 0, policy_version 1534 (0.0014) +[2024-07-05 10:33:09,993][17621] Fps is (10 sec: 9011.3, 60 sec: 8738.2, 300 sec: 8429.8). Total num frames: 6320128. Throughput: 0: 2164.4. Samples: 327090. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:09,994][17621] Avg episode reward: [(0, '32.906')] +[2024-07-05 10:33:09,995][19499] Saving new best policy, reward=32.906! +[2024-07-05 10:33:10,511][19513] Updated weights for policy 0, policy_version 1544 (0.0014) +[2024-07-05 10:33:14,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8669.9, 300 sec: 8422.4). Total num frames: 6361088. Throughput: 0: 2163.0. Samples: 333454. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:14,995][17621] Avg episode reward: [(0, '30.491')] +[2024-07-05 10:33:15,228][19513] Updated weights for policy 0, policy_version 1554 (0.0013) +[2024-07-05 10:33:19,993][17621] Fps is (10 sec: 8191.9, 60 sec: 8601.6, 300 sec: 8415.4). Total num frames: 6402048. Throughput: 0: 2161.9. Samples: 346514. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:19,994][17621] Avg episode reward: [(0, '28.422')] +[2024-07-05 10:33:19,995][19513] Updated weights for policy 0, policy_version 1564 (0.0014) +[2024-07-05 10:33:24,760][19513] Updated weights for policy 0, policy_version 1574 (0.0014) +[2024-07-05 10:33:24,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8669.9, 300 sec: 8432.9). Total num frames: 6447104. Throughput: 0: 2153.1. Samples: 359202. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:24,994][17621] Avg episode reward: [(0, '29.413')] +[2024-07-05 10:33:29,580][19513] Updated weights for policy 0, policy_version 1584 (0.0014) +[2024-07-05 10:33:29,993][17621] Fps is (10 sec: 8601.7, 60 sec: 8601.7, 300 sec: 8426.1). Total num frames: 6488064. Throughput: 0: 2158.1. Samples: 365796. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:29,994][17621] Avg episode reward: [(0, '32.376')] +[2024-07-05 10:33:34,410][19513] Updated weights for policy 0, policy_version 1594 (0.0014) +[2024-07-05 10:33:34,995][17621] Fps is (10 sec: 8600.3, 60 sec: 8601.4, 300 sec: 8442.2). Total num frames: 6533120. Throughput: 0: 2145.7. Samples: 378398. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:35,008][17621] Avg episode reward: [(0, '31.715')] +[2024-07-05 10:33:39,199][19513] Updated weights for policy 0, policy_version 1604 (0.0014) +[2024-07-05 10:33:39,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8601.6, 300 sec: 8435.5). Total num frames: 6574080. Throughput: 0: 2146.4. Samples: 391236. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:39,994][17621] Avg episode reward: [(0, '31.934')] +[2024-07-05 10:33:43,972][19513] Updated weights for policy 0, policy_version 1614 (0.0014) +[2024-07-05 10:33:44,993][17621] Fps is (10 sec: 8603.0, 60 sec: 8602.5, 300 sec: 8450.7). Total num frames: 6619136. Throughput: 0: 2141.7. Samples: 397684. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:44,994][17621] Avg episode reward: [(0, '29.500')] +[2024-07-05 10:33:48,764][19513] Updated weights for policy 0, policy_version 1624 (0.0014) +[2024-07-05 10:33:49,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8601.6, 300 sec: 8444.1). Total num frames: 6660096. Throughput: 0: 2142.5. Samples: 410406. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:49,994][17621] Avg episode reward: [(0, '31.157')] +[2024-07-05 10:33:53,594][19513] Updated weights for policy 0, policy_version 1634 (0.0014) +[2024-07-05 10:33:54,993][17621] Fps is (10 sec: 8192.0, 60 sec: 8533.3, 300 sec: 8437.8). Total num frames: 6701056. Throughput: 0: 2138.0. Samples: 423302. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:54,994][17621] Avg episode reward: [(0, '30.499')] +[2024-07-05 10:33:58,371][19513] Updated weights for policy 0, policy_version 1644 (0.0014) +[2024-07-05 10:33:59,993][17621] Fps is (10 sec: 8601.7, 60 sec: 8601.6, 300 sec: 8451.7). Total num frames: 6746112. Throughput: 0: 2137.7. Samples: 429652. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:33:59,995][17621] Avg episode reward: [(0, '29.661')] +[2024-07-05 10:34:03,156][19513] Updated weights for policy 0, policy_version 1654 (0.0014) +[2024-07-05 10:34:04,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8533.3, 300 sec: 8445.6). Total num frames: 6787072. Throughput: 0: 2136.5. Samples: 442656. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:34:04,994][17621] Avg episode reward: [(0, '27.723')] +[2024-07-05 10:34:07,927][19513] Updated weights for policy 0, policy_version 1664 (0.0014) +[2024-07-05 10:34:09,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8533.3, 300 sec: 8458.7). Total num frames: 6832128. Throughput: 0: 2134.1. Samples: 455238. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:34:09,994][17621] Avg episode reward: [(0, '26.168')] +[2024-07-05 10:34:12,829][19513] Updated weights for policy 0, policy_version 1674 (0.0016) +[2024-07-05 10:34:14,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8533.3, 300 sec: 8452.7). Total num frames: 6873088. Throughput: 0: 2127.0. Samples: 461512. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:34:14,994][17621] Avg episode reward: [(0, '25.991')] +[2024-07-05 10:34:17,664][19513] Updated weights for policy 0, policy_version 1684 (0.0015) +[2024-07-05 10:34:19,993][17621] Fps is (10 sec: 8192.0, 60 sec: 8533.3, 300 sec: 8446.9). Total num frames: 6914048. Throughput: 0: 2133.5. Samples: 474404. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:34:19,994][17621] Avg episode reward: [(0, '26.754')] +[2024-07-05 10:34:22,498][19513] Updated weights for policy 0, policy_version 1694 (0.0014) +[2024-07-05 10:34:24,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8533.3, 300 sec: 8459.1). Total num frames: 6959104. Throughput: 0: 2128.0. Samples: 486998. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:34:24,994][17621] Avg episode reward: [(0, '26.880')] +[2024-07-05 10:34:24,998][19499] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001699_6959104.pth... +[2024-07-05 10:34:25,106][19499] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001224_5013504.pth +[2024-07-05 10:34:27,279][19513] Updated weights for policy 0, policy_version 1704 (0.0013) +[2024-07-05 10:34:29,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8533.3, 300 sec: 8453.4). Total num frames: 7000064. Throughput: 0: 2127.3. Samples: 493412. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 10:34:29,995][17621] Avg episode reward: [(0, '27.843')] +[2024-07-05 10:34:32,068][19513] Updated weights for policy 0, policy_version 1714 (0.0014) +[2024-07-05 10:34:34,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8533.6, 300 sec: 8465.1). Total num frames: 7045120. Throughput: 0: 2130.5. Samples: 506280. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:34:34,994][17621] Avg episode reward: [(0, '29.866')] +[2024-07-05 10:34:36,911][19513] Updated weights for policy 0, policy_version 1724 (0.0015) +[2024-07-05 10:34:39,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8533.3, 300 sec: 8459.5). Total num frames: 7086080. Throughput: 0: 2125.0. Samples: 518928. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:34:39,995][17621] Avg episode reward: [(0, '30.013')] +[2024-07-05 10:34:41,714][19513] Updated weights for policy 0, policy_version 1734 (0.0015) +[2024-07-05 10:34:44,993][17621] Fps is (10 sec: 8191.9, 60 sec: 8465.1, 300 sec: 8454.1). Total num frames: 7127040. Throughput: 0: 2128.1. Samples: 525418. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:34:44,994][17621] Avg episode reward: [(0, '29.615')] +[2024-07-05 10:34:46,493][19513] Updated weights for policy 0, policy_version 1744 (0.0014) +[2024-07-05 10:34:49,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8533.3, 300 sec: 8465.1). Total num frames: 7172096. Throughput: 0: 2124.4. Samples: 538256. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:34:49,995][17621] Avg episode reward: [(0, '28.615')] +[2024-07-05 10:34:51,284][19513] Updated weights for policy 0, policy_version 1754 (0.0014) +[2024-07-05 10:34:54,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8533.3, 300 sec: 8459.8). Total num frames: 7213056. Throughput: 0: 2135.1. Samples: 551318. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:34:54,994][17621] Avg episode reward: [(0, '26.984')] +[2024-07-05 10:34:55,984][19513] Updated weights for policy 0, policy_version 1764 (0.0013) +[2024-07-05 10:34:59,993][17621] Fps is (10 sec: 8601.8, 60 sec: 8533.3, 300 sec: 8470.2). Total num frames: 7258112. Throughput: 0: 2138.5. Samples: 557744. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:34:59,994][17621] Avg episode reward: [(0, '27.164')] +[2024-07-05 10:35:00,687][19513] Updated weights for policy 0, policy_version 1774 (0.0013) +[2024-07-05 10:35:04,993][17621] Fps is (10 sec: 9011.3, 60 sec: 8601.6, 300 sec: 8480.2). Total num frames: 7303168. Throughput: 0: 2143.7. Samples: 570872. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:35:04,994][17621] Avg episode reward: [(0, '29.269')] +[2024-07-05 10:35:05,397][19513] Updated weights for policy 0, policy_version 1784 (0.0013) +[2024-07-05 10:35:09,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8533.3, 300 sec: 8475.0). Total num frames: 7344128. Throughput: 0: 2157.2. Samples: 584070. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:35:09,994][17621] Avg episode reward: [(0, '28.341')] +[2024-07-05 10:35:10,078][19513] Updated weights for policy 0, policy_version 1794 (0.0013) +[2024-07-05 10:35:14,767][19513] Updated weights for policy 0, policy_version 1804 (0.0013) +[2024-07-05 10:35:14,993][17621] Fps is (10 sec: 8601.7, 60 sec: 8601.6, 300 sec: 8484.6). Total num frames: 7389184. Throughput: 0: 2157.9. Samples: 590518. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:35:14,994][17621] Avg episode reward: [(0, '27.120')] +[2024-07-05 10:35:19,464][19513] Updated weights for policy 0, policy_version 1814 (0.0013) +[2024-07-05 10:35:19,993][17621] Fps is (10 sec: 9011.3, 60 sec: 8669.9, 300 sec: 8493.8). Total num frames: 7434240. Throughput: 0: 2162.9. Samples: 603612. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:35:19,994][17621] Avg episode reward: [(0, '28.006')] +[2024-07-05 10:35:24,156][19513] Updated weights for policy 0, policy_version 1824 (0.0013) +[2024-07-05 10:35:24,993][17621] Fps is (10 sec: 8601.4, 60 sec: 8601.6, 300 sec: 8488.6). Total num frames: 7475200. Throughput: 0: 2174.5. Samples: 616782. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:35:24,995][17621] Avg episode reward: [(0, '29.148')] +[2024-07-05 10:35:28,834][19513] Updated weights for policy 0, policy_version 1834 (0.0013) +[2024-07-05 10:35:29,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8669.9, 300 sec: 8497.5). Total num frames: 7520256. Throughput: 0: 2174.8. Samples: 623286. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:35:29,994][17621] Avg episode reward: [(0, '28.570')] +[2024-07-05 10:35:33,524][19513] Updated weights for policy 0, policy_version 1844 (0.0013) +[2024-07-05 10:35:34,993][17621] Fps is (10 sec: 9011.3, 60 sec: 8669.9, 300 sec: 8636.3). Total num frames: 7565312. Throughput: 0: 2180.0. Samples: 636354. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:35:34,994][17621] Avg episode reward: [(0, '29.473')] +[2024-07-05 10:35:38,228][19513] Updated weights for policy 0, policy_version 1854 (0.0014) +[2024-07-05 10:35:39,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8669.9, 300 sec: 8622.4). Total num frames: 7606272. Throughput: 0: 2182.6. Samples: 649534. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:35:39,995][17621] Avg episode reward: [(0, '30.169')] +[2024-07-05 10:35:42,886][19513] Updated weights for policy 0, policy_version 1864 (0.0013) +[2024-07-05 10:35:44,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8738.1, 300 sec: 8622.4). Total num frames: 7651328. Throughput: 0: 2183.5. Samples: 656000. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:35:44,994][17621] Avg episode reward: [(0, '29.035')] +[2024-07-05 10:35:47,559][19513] Updated weights for policy 0, policy_version 1874 (0.0013) +[2024-07-05 10:35:50,001][17621] Fps is (10 sec: 9005.6, 60 sec: 8737.3, 300 sec: 8622.3). Total num frames: 7696384. Throughput: 0: 2184.6. Samples: 669194. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:35:50,004][17621] Avg episode reward: [(0, '29.115')] +[2024-07-05 10:35:52,205][19513] Updated weights for policy 0, policy_version 1884 (0.0013) +[2024-07-05 10:35:54,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8738.1, 300 sec: 8622.4). Total num frames: 7737344. Throughput: 0: 2186.0. Samples: 682438. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:35:54,994][17621] Avg episode reward: [(0, '32.234')] +[2024-07-05 10:35:56,883][19513] Updated weights for policy 0, policy_version 1894 (0.0013) +[2024-07-05 10:35:59,993][17621] Fps is (10 sec: 8606.8, 60 sec: 8738.1, 300 sec: 8622.4). Total num frames: 7782400. Throughput: 0: 2186.0. Samples: 688890. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:35:59,995][17621] Avg episode reward: [(0, '33.914')] +[2024-07-05 10:36:00,163][19499] Saving new best policy, reward=33.914! +[2024-07-05 10:36:01,551][19513] Updated weights for policy 0, policy_version 1904 (0.0013) +[2024-07-05 10:36:04,993][17621] Fps is (10 sec: 9011.3, 60 sec: 8738.1, 300 sec: 8622.4). Total num frames: 7827456. Throughput: 0: 2189.2. Samples: 702128. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:04,994][17621] Avg episode reward: [(0, '34.746')] +[2024-07-05 10:36:05,246][19499] Saving new best policy, reward=34.746! +[2024-07-05 10:36:06,180][19513] Updated weights for policy 0, policy_version 1914 (0.0013) +[2024-07-05 10:36:09,993][17621] Fps is (10 sec: 9011.3, 60 sec: 8806.4, 300 sec: 8636.3). Total num frames: 7872512. Throughput: 0: 2187.1. Samples: 715202. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:09,994][17621] Avg episode reward: [(0, '30.502')] +[2024-07-05 10:36:10,963][19513] Updated weights for policy 0, policy_version 1924 (0.0014) +[2024-07-05 10:36:14,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8738.1, 300 sec: 8622.4). Total num frames: 7913472. Throughput: 0: 2184.5. Samples: 721588. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:36:14,994][17621] Avg episode reward: [(0, '28.684')] +[2024-07-05 10:36:15,732][19513] Updated weights for policy 0, policy_version 1934 (0.0014) +[2024-07-05 10:36:19,993][17621] Fps is (10 sec: 8192.1, 60 sec: 8669.9, 300 sec: 8608.5). Total num frames: 7954432. Throughput: 0: 2183.2. Samples: 734598. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:19,994][17621] Avg episode reward: [(0, '30.659')] +[2024-07-05 10:36:20,477][19513] Updated weights for policy 0, policy_version 1944 (0.0013) +[2024-07-05 10:36:24,993][17621] Fps is (10 sec: 8601.7, 60 sec: 8738.2, 300 sec: 8622.4). Total num frames: 7999488. Throughput: 0: 2176.8. Samples: 747488. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:24,994][17621] Avg episode reward: [(0, '31.186')] +[2024-07-05 10:36:25,181][19499] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001954_8003584.pth... +[2024-07-05 10:36:25,182][19513] Updated weights for policy 0, policy_version 1954 (0.0013) +[2024-07-05 10:36:25,284][19499] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001448_5931008.pth +[2024-07-05 10:36:29,957][19513] Updated weights for policy 0, policy_version 1964 (0.0014) +[2024-07-05 10:36:29,993][17621] Fps is (10 sec: 9011.2, 60 sec: 8738.1, 300 sec: 8622.4). Total num frames: 8044544. Throughput: 0: 2178.8. Samples: 754044. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:29,994][17621] Avg episode reward: [(0, '32.391')] +[2024-07-05 10:36:34,661][19513] Updated weights for policy 0, policy_version 1974 (0.0014) +[2024-07-05 10:36:34,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8669.9, 300 sec: 8622.4). Total num frames: 8085504. Throughput: 0: 2172.8. Samples: 766958. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:34,994][17621] Avg episode reward: [(0, '31.105')] +[2024-07-05 10:36:39,345][19513] Updated weights for policy 0, policy_version 1984 (0.0014) +[2024-07-05 10:36:39,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8738.1, 300 sec: 8622.4). Total num frames: 8130560. Throughput: 0: 2167.5. Samples: 779974. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:39,994][17621] Avg episode reward: [(0, '30.532')] +[2024-07-05 10:36:44,137][19513] Updated weights for policy 0, policy_version 1994 (0.0014) +[2024-07-05 10:36:44,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8669.9, 300 sec: 8622.4). Total num frames: 8171520. Throughput: 0: 2165.6. Samples: 786344. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:44,994][17621] Avg episode reward: [(0, '29.444')] +[2024-07-05 10:36:48,971][19513] Updated weights for policy 0, policy_version 2004 (0.0014) +[2024-07-05 10:36:49,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8670.7, 300 sec: 8636.3). Total num frames: 8216576. Throughput: 0: 2158.1. Samples: 799242. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:49,994][17621] Avg episode reward: [(0, '30.250')] +[2024-07-05 10:36:53,647][19513] Updated weights for policy 0, policy_version 2014 (0.0013) +[2024-07-05 10:36:54,993][17621] Fps is (10 sec: 8601.5, 60 sec: 8669.9, 300 sec: 8636.3). Total num frames: 8257536. Throughput: 0: 2158.6. Samples: 812340. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:54,994][17621] Avg episode reward: [(0, '31.023')] +[2024-07-05 10:36:58,327][19513] Updated weights for policy 0, policy_version 2024 (0.0013) +[2024-07-05 10:36:59,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8669.9, 300 sec: 8636.3). Total num frames: 8302592. Throughput: 0: 2160.8. Samples: 818822. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:36:59,994][17621] Avg episode reward: [(0, '31.928')] +[2024-07-05 10:37:02,975][19513] Updated weights for policy 0, policy_version 2034 (0.0013) +[2024-07-05 10:37:04,993][17621] Fps is (10 sec: 9011.2, 60 sec: 8669.9, 300 sec: 8650.2). Total num frames: 8347648. Throughput: 0: 2165.2. Samples: 832034. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:37:04,994][17621] Avg episode reward: [(0, '32.171')] +[2024-07-05 10:37:07,648][19513] Updated weights for policy 0, policy_version 2044 (0.0013) +[2024-07-05 10:37:09,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8601.6, 300 sec: 8636.3). Total num frames: 8388608. Throughput: 0: 2171.8. Samples: 845220. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:37:09,994][17621] Avg episode reward: [(0, '31.608')] +[2024-07-05 10:37:12,349][19513] Updated weights for policy 0, policy_version 2054 (0.0013) +[2024-07-05 10:37:14,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8669.9, 300 sec: 8636.3). Total num frames: 8433664. Throughput: 0: 2169.5. Samples: 851672. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:37:14,994][17621] Avg episode reward: [(0, '31.721')] +[2024-07-05 10:37:17,024][19513] Updated weights for policy 0, policy_version 2064 (0.0013) +[2024-07-05 10:37:19,993][17621] Fps is (10 sec: 9011.3, 60 sec: 8738.1, 300 sec: 8650.2). Total num frames: 8478720. Throughput: 0: 2175.7. Samples: 864866. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:37:19,994][17621] Avg episode reward: [(0, '30.887')] +[2024-07-05 10:37:21,672][19513] Updated weights for policy 0, policy_version 2074 (0.0013) +[2024-07-05 10:37:24,993][17621] Fps is (10 sec: 9011.2, 60 sec: 8738.1, 300 sec: 8650.2). Total num frames: 8523776. Throughput: 0: 2179.2. Samples: 878040. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:37:24,994][17621] Avg episode reward: [(0, '29.373')] +[2024-07-05 10:37:26,364][19513] Updated weights for policy 0, policy_version 2084 (0.0013) +[2024-07-05 10:37:29,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8669.9, 300 sec: 8636.3). Total num frames: 8564736. Throughput: 0: 2180.5. Samples: 884468. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:37:29,994][17621] Avg episode reward: [(0, '28.091')] +[2024-07-05 10:37:31,062][19513] Updated weights for policy 0, policy_version 2094 (0.0013) +[2024-07-05 10:37:34,993][17621] Fps is (10 sec: 8601.6, 60 sec: 8738.1, 300 sec: 8650.2). Total num frames: 8609792. Throughput: 0: 2188.0. Samples: 897700. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 10:37:34,994][17621] Avg episode reward: [(0, '28.210')] +[2024-07-05 10:37:35,402][19513] Updated weights for policy 0, policy_version 2104 (0.0013) +[2024-07-05 10:37:38,916][19513] Updated weights for policy 0, policy_version 2114 (0.0011) +[2024-07-05 10:37:39,993][17621] Fps is (10 sec: 10649.6, 60 sec: 9011.2, 300 sec: 8705.9). Total num frames: 8671232. Throughput: 0: 2273.9. Samples: 914664. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:37:39,994][17621] Avg episode reward: [(0, '29.955')] +[2024-07-05 10:37:42,437][19513] Updated weights for policy 0, policy_version 2124 (0.0012) +[2024-07-05 10:37:44,993][17621] Fps is (10 sec: 11878.4, 60 sec: 9284.3, 300 sec: 8761.3). Total num frames: 8728576. Throughput: 0: 2318.6. Samples: 923160. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:37:44,994][17621] Avg episode reward: [(0, '32.427')] +[2024-07-05 10:37:45,930][19513] Updated weights for policy 0, policy_version 2134 (0.0011) +[2024-07-05 10:37:49,470][19513] Updated weights for policy 0, policy_version 2144 (0.0012) +[2024-07-05 10:37:49,993][17621] Fps is (10 sec: 11468.8, 60 sec: 9489.1, 300 sec: 8802.9). Total num frames: 8785920. Throughput: 0: 2412.4. Samples: 940590. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:37:49,994][17621] Avg episode reward: [(0, '31.619')] +[2024-07-05 10:37:52,980][19513] Updated weights for policy 0, policy_version 2154 (0.0012) +[2024-07-05 10:37:54,993][17621] Fps is (10 sec: 11468.8, 60 sec: 9762.2, 300 sec: 8858.5). Total num frames: 8843264. Throughput: 0: 2506.8. Samples: 958026. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:37:54,994][17621] Avg episode reward: [(0, '31.156')] +[2024-07-05 10:37:56,509][19513] Updated weights for policy 0, policy_version 2164 (0.0012) +[2024-07-05 10:37:59,993][17621] Fps is (10 sec: 11468.8, 60 sec: 9967.0, 300 sec: 8900.1). Total num frames: 8900608. Throughput: 0: 2562.2. Samples: 966972. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:37:59,994][17621] Avg episode reward: [(0, '30.361')] +[2024-07-05 10:38:00,055][19513] Updated weights for policy 0, policy_version 2174 (0.0012) +[2024-07-05 10:38:03,623][19513] Updated weights for policy 0, policy_version 2184 (0.0012) +[2024-07-05 10:38:04,993][17621] Fps is (10 sec: 11468.6, 60 sec: 10171.7, 300 sec: 8941.8). Total num frames: 8957952. Throughput: 0: 2652.3. Samples: 984220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:04,994][17621] Avg episode reward: [(0, '30.532')] +[2024-07-05 10:38:07,205][19513] Updated weights for policy 0, policy_version 2194 (0.0012) +[2024-07-05 10:38:09,993][17621] Fps is (10 sec: 11468.7, 60 sec: 10444.8, 300 sec: 8997.3). Total num frames: 9015296. Throughput: 0: 2741.5. Samples: 1001408. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:09,994][17621] Avg episode reward: [(0, '32.016')] +[2024-07-05 10:38:10,731][19513] Updated weights for policy 0, policy_version 2204 (0.0012) +[2024-07-05 10:38:14,241][19513] Updated weights for policy 0, policy_version 2214 (0.0011) +[2024-07-05 10:38:14,993][17621] Fps is (10 sec: 11878.6, 60 sec: 10717.9, 300 sec: 9066.7). Total num frames: 9076736. Throughput: 0: 2792.2. Samples: 1010116. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:14,994][17621] Avg episode reward: [(0, '32.414')] +[2024-07-05 10:38:17,747][19513] Updated weights for policy 0, policy_version 2224 (0.0012) +[2024-07-05 10:38:19,993][17621] Fps is (10 sec: 11878.3, 60 sec: 10922.6, 300 sec: 9108.4). Total num frames: 9134080. Throughput: 0: 2886.1. Samples: 1027576. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:19,995][17621] Avg episode reward: [(0, '32.555')] +[2024-07-05 10:38:21,268][19513] Updated weights for policy 0, policy_version 2234 (0.0012) +[2024-07-05 10:38:24,784][19513] Updated weights for policy 0, policy_version 2244 (0.0012) +[2024-07-05 10:38:24,993][17621] Fps is (10 sec: 11468.9, 60 sec: 11127.5, 300 sec: 9163.9). Total num frames: 9191424. Throughput: 0: 2897.4. Samples: 1045048. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:24,994][17621] Avg episode reward: [(0, '31.650')] +[2024-07-05 10:38:25,138][19499] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000002245_9195520.pth... +[2024-07-05 10:38:25,213][19499] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001699_6959104.pth +[2024-07-05 10:38:28,339][19513] Updated weights for policy 0, policy_version 2254 (0.0012) +[2024-07-05 10:38:29,993][17621] Fps is (10 sec: 11468.8, 60 sec: 11400.5, 300 sec: 9205.6). Total num frames: 9248768. Throughput: 0: 2905.4. Samples: 1053904. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:29,995][17621] Avg episode reward: [(0, '30.924')] +[2024-07-05 10:38:31,876][19513] Updated weights for policy 0, policy_version 2264 (0.0012) +[2024-07-05 10:38:34,993][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 9261.1). Total num frames: 9306112. Throughput: 0: 2897.9. Samples: 1070994. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:34,994][17621] Avg episode reward: [(0, '29.626')] +[2024-07-05 10:38:35,538][19513] Updated weights for policy 0, policy_version 2274 (0.0012) +[2024-07-05 10:38:39,212][19513] Updated weights for policy 0, policy_version 2284 (0.0013) +[2024-07-05 10:38:39,993][17621] Fps is (10 sec: 11468.9, 60 sec: 11537.1, 300 sec: 9302.8). Total num frames: 9363456. Throughput: 0: 2882.4. Samples: 1087732. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:39,994][17621] Avg episode reward: [(0, '30.748')] +[2024-07-05 10:38:42,884][19513] Updated weights for policy 0, policy_version 2294 (0.0013) +[2024-07-05 10:38:44,993][17621] Fps is (10 sec: 11059.2, 60 sec: 11468.8, 300 sec: 9344.4). Total num frames: 9416704. Throughput: 0: 2868.5. Samples: 1096054. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:44,994][17621] Avg episode reward: [(0, '30.291')] +[2024-07-05 10:38:46,573][19513] Updated weights for policy 0, policy_version 2304 (0.0012) +[2024-07-05 10:38:49,993][17621] Fps is (10 sec: 11059.3, 60 sec: 11468.8, 300 sec: 9400.0). Total num frames: 9474048. Throughput: 0: 2853.2. Samples: 1112612. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:49,994][17621] Avg episode reward: [(0, '34.336')] +[2024-07-05 10:38:50,237][19513] Updated weights for policy 0, policy_version 2314 (0.0012) +[2024-07-05 10:38:53,885][19513] Updated weights for policy 0, policy_version 2324 (0.0013) +[2024-07-05 10:38:54,993][17621] Fps is (10 sec: 11468.8, 60 sec: 11468.8, 300 sec: 9441.6). Total num frames: 9531392. Throughput: 0: 2850.1. Samples: 1129664. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:54,994][17621] Avg episode reward: [(0, '31.355')] +[2024-07-05 10:38:57,566][19513] Updated weights for policy 0, policy_version 2334 (0.0012) +[2024-07-05 10:38:59,993][17621] Fps is (10 sec: 11059.1, 60 sec: 11400.5, 300 sec: 9483.3). Total num frames: 9584640. Throughput: 0: 2840.4. Samples: 1137934. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:38:59,994][17621] Avg episode reward: [(0, '33.801')] +[2024-07-05 10:39:01,230][19513] Updated weights for policy 0, policy_version 2344 (0.0013) +[2024-07-05 10:39:04,901][19513] Updated weights for policy 0, policy_version 2354 (0.0012) +[2024-07-05 10:39:04,993][17621] Fps is (10 sec: 11059.1, 60 sec: 11400.5, 300 sec: 9524.9). Total num frames: 9641984. Throughput: 0: 2821.2. Samples: 1154532. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:39:04,994][17621] Avg episode reward: [(0, '31.694')] +[2024-07-05 10:39:08,588][19513] Updated weights for policy 0, policy_version 2364 (0.0013) +[2024-07-05 10:39:09,993][17621] Fps is (10 sec: 11059.3, 60 sec: 11332.3, 300 sec: 9566.6). Total num frames: 9695232. Throughput: 0: 2805.2. Samples: 1171280. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:39:09,994][17621] Avg episode reward: [(0, '34.175')] +[2024-07-05 10:39:12,262][19513] Updated weights for policy 0, policy_version 2374 (0.0012) +[2024-07-05 10:39:14,993][17621] Fps is (10 sec: 11059.3, 60 sec: 11264.0, 300 sec: 9622.1). Total num frames: 9752576. Throughput: 0: 2797.8. Samples: 1179804. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:39:14,994][17621] Avg episode reward: [(0, '34.321')] +[2024-07-05 10:39:15,931][19513] Updated weights for policy 0, policy_version 2384 (0.0012) +[2024-07-05 10:39:19,581][19513] Updated weights for policy 0, policy_version 2394 (0.0013) +[2024-07-05 10:39:19,993][17621] Fps is (10 sec: 11468.9, 60 sec: 11264.0, 300 sec: 9663.8). Total num frames: 9809920. Throughput: 0: 2787.5. Samples: 1196430. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:39:19,994][17621] Avg episode reward: [(0, '31.875')] +[2024-07-05 10:39:23,240][19513] Updated weights for policy 0, policy_version 2404 (0.0013) +[2024-07-05 10:39:24,993][17621] Fps is (10 sec: 11059.2, 60 sec: 11195.7, 300 sec: 9705.4). Total num frames: 9863168. Throughput: 0: 2787.5. Samples: 1213168. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:39:24,994][17621] Avg episode reward: [(0, '31.454')] +[2024-07-05 10:39:26,939][19513] Updated weights for policy 0, policy_version 2414 (0.0012) +[2024-07-05 10:39:29,993][17621] Fps is (10 sec: 11059.0, 60 sec: 11195.7, 300 sec: 9747.1). Total num frames: 9920512. Throughput: 0: 2790.3. Samples: 1221618. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:39:29,994][17621] Avg episode reward: [(0, '32.980')] +[2024-07-05 10:39:30,609][19513] Updated weights for policy 0, policy_version 2424 (0.0012) +[2024-07-05 10:39:34,284][19513] Updated weights for policy 0, policy_version 2434 (0.0012) +[2024-07-05 10:39:34,993][17621] Fps is (10 sec: 11059.3, 60 sec: 11127.5, 300 sec: 9788.8). Total num frames: 9973760. Throughput: 0: 2794.3. Samples: 1238354. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:39:34,994][17621] Avg episode reward: [(0, '32.667')] +[2024-07-05 10:39:37,586][19499] Stopping Batcher_0... +[2024-07-05 10:39:37,586][19499] Loop batcher_evt_loop terminating... +[2024-07-05 10:39:37,586][17621] Component Batcher_0 stopped! +[2024-07-05 10:39:37,587][19499] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-07-05 10:39:37,595][19518] Stopping RolloutWorker_w5... +[2024-07-05 10:39:37,595][19520] Stopping RolloutWorker_w7... +[2024-07-05 10:39:37,596][19516] Stopping RolloutWorker_w2... +[2024-07-05 10:39:37,596][19514] Stopping RolloutWorker_w1... +[2024-07-05 10:39:37,596][19517] Stopping RolloutWorker_w4... +[2024-07-05 10:39:37,596][19518] Loop rollout_proc5_evt_loop terminating... +[2024-07-05 10:39:37,596][19520] Loop rollout_proc7_evt_loop terminating... +[2024-07-05 10:39:37,596][19514] Loop rollout_proc1_evt_loop terminating... +[2024-07-05 10:39:37,596][19516] Loop rollout_proc2_evt_loop terminating... +[2024-07-05 10:39:37,596][19517] Loop rollout_proc4_evt_loop terminating... +[2024-07-05 10:39:37,596][19515] Stopping RolloutWorker_w3... +[2024-07-05 10:39:37,596][19512] Stopping RolloutWorker_w0... +[2024-07-05 10:39:37,596][19519] Stopping RolloutWorker_w6... +[2024-07-05 10:39:37,596][17621] Component RolloutWorker_w5 stopped! +[2024-07-05 10:39:37,597][19512] Loop rollout_proc0_evt_loop terminating... +[2024-07-05 10:39:37,597][19515] Loop rollout_proc3_evt_loop terminating... +[2024-07-05 10:39:37,597][19519] Loop rollout_proc6_evt_loop terminating... +[2024-07-05 10:39:37,597][17621] Component RolloutWorker_w7 stopped! +[2024-07-05 10:39:37,598][17621] Component RolloutWorker_w2 stopped! +[2024-07-05 10:39:37,599][17621] Component RolloutWorker_w1 stopped! +[2024-07-05 10:39:37,600][17621] Component RolloutWorker_w4 stopped! +[2024-07-05 10:39:37,601][17621] Component RolloutWorker_w3 stopped! +[2024-07-05 10:39:37,605][17621] Component RolloutWorker_w0 stopped! +[2024-07-05 10:39:37,606][17621] Component RolloutWorker_w6 stopped! +[2024-07-05 10:39:37,619][19513] Weights refcount: 2 0 +[2024-07-05 10:39:37,622][19513] Stopping InferenceWorker_p0-w0... +[2024-07-05 10:39:37,622][19513] Loop inference_proc0-0_evt_loop terminating... +[2024-07-05 10:39:37,622][17621] Component InferenceWorker_p0-w0 stopped! +[2024-07-05 10:39:37,689][19499] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000001954_8003584.pth +[2024-07-05 10:39:37,703][19499] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-07-05 10:39:37,832][19499] Stopping LearnerWorker_p0... +[2024-07-05 10:39:37,832][19499] Loop learner_proc0_evt_loop terminating... +[2024-07-05 10:39:37,832][17621] Component LearnerWorker_p0 stopped! +[2024-07-05 10:39:37,833][17621] Waiting for process learner_proc0 to stop... +[2024-07-05 10:39:38,874][17621] Waiting for process inference_proc0-0 to join... +[2024-07-05 10:39:38,875][17621] Waiting for process rollout_proc0 to join... +[2024-07-05 10:39:38,876][17621] Waiting for process rollout_proc1 to join... +[2024-07-05 10:39:38,877][17621] Waiting for process rollout_proc2 to join... +[2024-07-05 10:39:38,877][17621] Waiting for process rollout_proc3 to join... +[2024-07-05 10:39:38,878][17621] Waiting for process rollout_proc4 to join... +[2024-07-05 10:39:38,878][17621] Waiting for process rollout_proc5 to join... +[2024-07-05 10:39:38,879][17621] Waiting for process rollout_proc6 to join... +[2024-07-05 10:39:38,879][17621] Waiting for process rollout_proc7 to join... +[2024-07-05 10:39:38,880][17621] Batcher 0 profile tree view: +batching: 7.8226, releasing_batches: 0.0301 +[2024-07-05 10:39:38,880][17621] InferenceWorker_p0-w0 profile tree view: wait_policy: 0.0000 - wait_policy_total: 48.0327 -update_model: 15.5557 - weight_update: 0.0007 -one_step: 0.0025 - handle_policy_step: 942.8693 - deserialize: 74.3069, stack: 5.2415, obs_to_device_normalize: 225.0848, forward: 437.0021, send_messages: 47.3034 - prepare_outputs: 121.3818 - to_cpu: 73.2670 -[2024-07-05 14:11:16,784][03359] Learner 0 profile tree view: -misc: 0.0285, prepare_batch: 99.2626 -train: 216.4590 - epoch_init: 0.0202, minibatch_init: 0.0275, losses_postprocess: 1.3346, kl_divergence: 1.4462, after_optimizer: 0.9918 - calculate_losses: 76.2906 - losses_init: 0.0106, forward_head: 3.1115, bptt_initial: 59.8475, tail: 2.7148, advantages_returns: 0.7606, losses: 4.3112 - bptt: 4.6761 - bptt_forward_core: 4.4631 - update: 134.3772 - clip: 4.0994 -[2024-07-05 14:11:16,784][03359] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.4502, enqueue_policy_requests: 31.0993, env_step: 490.4718, overhead: 50.7846, complete_rollouts: 1.1539 -save_policy_outputs: 36.8057 - split_output_tensors: 17.1842 -[2024-07-05 14:11:16,785][03359] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.4672, enqueue_policy_requests: 31.9116, env_step: 508.5081, overhead: 52.3026, complete_rollouts: 1.2318 -save_policy_outputs: 37.4049 - split_output_tensors: 17.3587 -[2024-07-05 14:11:16,785][03359] Loop Runner_EvtLoop terminating... -[2024-07-05 14:11:16,786][03359] Runner profile tree view: -main_loop: 1048.4380 -[2024-07-05 14:11:16,786][03359] Collected {0: 400015360}, FPS: 47693.8 -[2024-07-05 14:20:25,296][03359] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 14:20:25,298][03359] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 14:20:25,299][03359] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 14:20:25,299][03359] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 14:20:25,300][03359] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 14:20:25,301][03359] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 14:20:25,301][03359] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 14:20:25,301][03359] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 14:20:25,302][03359] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 14:20:25,302][03359] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 14:20:25,303][03359] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 14:20:25,303][03359] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 14:20:25,303][03359] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 14:20:25,304][03359] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 14:20:25,304][03359] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 14:20:25,328][03359] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:20:25,331][03359] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 14:20:25,333][03359] RunningMeanStd input shape: (1,) -[2024-07-05 14:20:25,346][03359] ConvEncoder: input_channels=3 -[2024-07-05 14:20:25,429][03359] Conv encoder output size: 512 -[2024-07-05 14:20:25,430][03359] Policy head output size: 512 -[2024-07-05 14:20:25,626][03359] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000051272_400015360.pth... -[2024-07-05 14:20:26,806][03359] Num frames 100... -[2024-07-05 14:20:26,919][03359] Num frames 200... -[2024-07-05 14:20:27,030][03359] Num frames 300... -[2024-07-05 14:20:27,139][03359] Num frames 400... -[2024-07-05 14:20:27,248][03359] Num frames 500... -[2024-07-05 14:20:27,357][03359] Num frames 600... -[2024-07-05 14:20:27,469][03359] Num frames 700... -[2024-07-05 14:20:27,580][03359] Num frames 800... -[2024-07-05 14:20:27,694][03359] Num frames 900... -[2024-07-05 14:20:27,828][03359] Num frames 1000... -[2024-07-05 14:20:27,948][03359] Num frames 1100... -[2024-07-05 14:20:28,033][03359] Num frames 1200... -[2024-07-05 14:20:28,117][03359] Num frames 1300... -[2024-07-05 14:20:28,203][03359] Num frames 1400... -[2024-07-05 14:20:28,289][03359] Num frames 1500... -[2024-07-05 14:20:28,374][03359] Num frames 1600... -[2024-07-05 14:20:28,456][03359] Num frames 1700... -[2024-07-05 14:20:28,537][03359] Num frames 1800... -[2024-07-05 14:20:28,619][03359] Num frames 1900... -[2024-07-05 14:20:28,704][03359] Num frames 2000... -[2024-07-05 14:20:28,779][03359] Num frames 2100... -[2024-07-05 14:20:28,831][03359] Avg episode rewards: #0: 60.999, true rewards: #0: 21.000 -[2024-07-05 14:20:28,832][03359] Avg episode reward: 60.999, avg true_objective: 21.000 -[2024-07-05 14:20:28,907][03359] Num frames 2200... -[2024-07-05 14:20:28,985][03359] Num frames 2300... -[2024-07-05 14:20:29,062][03359] Num frames 2400... -[2024-07-05 14:20:29,139][03359] Num frames 2500... -[2024-07-05 14:20:29,216][03359] Num frames 2600... -[2024-07-05 14:20:29,292][03359] Num frames 2700... -[2024-07-05 14:20:29,369][03359] Num frames 2800... -[2024-07-05 14:20:29,445][03359] Num frames 2900... -[2024-07-05 14:20:29,518][03359] Num frames 3000... -[2024-07-05 14:20:29,590][03359] Num frames 3100... -[2024-07-05 14:20:29,665][03359] Num frames 3200... -[2024-07-05 14:20:29,741][03359] Num frames 3300... -[2024-07-05 14:20:29,819][03359] Num frames 3400... -[2024-07-05 14:20:29,913][03359] Num frames 3500... -[2024-07-05 14:20:29,992][03359] Num frames 3600... -[2024-07-05 14:20:30,069][03359] Num frames 3700... -[2024-07-05 14:20:30,147][03359] Num frames 3800... -[2024-07-05 14:20:30,223][03359] Num frames 3900... -[2024-07-05 14:20:30,300][03359] Num frames 4000... -[2024-07-05 14:20:30,378][03359] Num frames 4100... -[2024-07-05 14:20:30,457][03359] Num frames 4200... -[2024-07-05 14:20:30,509][03359] Avg episode rewards: #0: 61.499, true rewards: #0: 21.000 -[2024-07-05 14:20:30,511][03359] Avg episode reward: 61.499, avg true_objective: 21.000 -[2024-07-05 14:20:30,583][03359] Num frames 4300... -[2024-07-05 14:20:30,658][03359] Num frames 4400... -[2024-07-05 14:20:30,730][03359] Num frames 4500... -[2024-07-05 14:20:30,806][03359] Num frames 4600... -[2024-07-05 14:20:30,884][03359] Num frames 4700... -[2024-07-05 14:20:30,961][03359] Num frames 4800... -[2024-07-05 14:20:31,038][03359] Num frames 4900... -[2024-07-05 14:20:31,122][03359] Avg episode rewards: #0: 46.786, true rewards: #0: 16.453 -[2024-07-05 14:20:31,123][03359] Avg episode reward: 46.786, avg true_objective: 16.453 -[2024-07-05 14:20:31,174][03359] Num frames 5000... -[2024-07-05 14:20:31,251][03359] Num frames 5100... -[2024-07-05 14:20:31,327][03359] Num frames 5200... -[2024-07-05 14:20:31,402][03359] Num frames 5300... -[2024-07-05 14:20:31,478][03359] Num frames 5400... -[2024-07-05 14:20:31,555][03359] Num frames 5500... -[2024-07-05 14:20:31,630][03359] Num frames 5600... -[2024-07-05 14:20:31,704][03359] Num frames 5700... -[2024-07-05 14:20:31,777][03359] Num frames 5800... -[2024-07-05 14:20:31,852][03359] Num frames 5900... -[2024-07-05 14:20:31,931][03359] Num frames 6000... -[2024-07-05 14:20:32,011][03359] Num frames 6100... -[2024-07-05 14:20:32,088][03359] Num frames 6200... -[2024-07-05 14:20:32,165][03359] Num frames 6300... -[2024-07-05 14:20:32,242][03359] Num frames 6400... -[2024-07-05 14:20:32,319][03359] Num frames 6500... -[2024-07-05 14:20:32,395][03359] Num frames 6600... -[2024-07-05 14:20:32,472][03359] Num frames 6700... -[2024-07-05 14:20:32,550][03359] Num frames 6800... -[2024-07-05 14:20:32,628][03359] Num frames 6900... -[2024-07-05 14:20:32,706][03359] Num frames 7000... -[2024-07-05 14:20:32,785][03359] Avg episode rewards: #0: 50.839, true rewards: #0: 17.590 -[2024-07-05 14:20:32,786][03359] Avg episode reward: 50.839, avg true_objective: 17.590 -[2024-07-05 14:20:32,836][03359] Num frames 7100... -[2024-07-05 14:20:32,913][03359] Num frames 7200... -[2024-07-05 14:20:32,990][03359] Num frames 7300... -[2024-07-05 14:20:33,070][03359] Num frames 7400... -[2024-07-05 14:20:33,148][03359] Num frames 7500... -[2024-07-05 14:20:33,225][03359] Num frames 7600... -[2024-07-05 14:20:33,302][03359] Num frames 7700... -[2024-07-05 14:20:33,393][03359] Num frames 7800... -[2024-07-05 14:20:33,470][03359] Num frames 7900... -[2024-07-05 14:20:33,546][03359] Num frames 8000... -[2024-07-05 14:20:33,625][03359] Num frames 8100... -[2024-07-05 14:20:33,705][03359] Num frames 8200... -[2024-07-05 14:20:33,785][03359] Num frames 8300... -[2024-07-05 14:20:33,863][03359] Num frames 8400... -[2024-07-05 14:20:33,942][03359] Num frames 8500... -[2024-07-05 14:20:34,019][03359] Num frames 8600... -[2024-07-05 14:20:34,097][03359] Num frames 8700... -[2024-07-05 14:20:34,172][03359] Num frames 8800... -[2024-07-05 14:20:34,248][03359] Num frames 8900... -[2024-07-05 14:20:34,324][03359] Num frames 9000... -[2024-07-05 14:20:34,399][03359] Num frames 9100... -[2024-07-05 14:20:34,480][03359] Avg episode rewards: #0: 52.671, true rewards: #0: 18.272 -[2024-07-05 14:20:34,481][03359] Avg episode reward: 52.671, avg true_objective: 18.272 -[2024-07-05 14:20:34,532][03359] Num frames 9200... -[2024-07-05 14:20:34,608][03359] Num frames 9300... -[2024-07-05 14:20:34,686][03359] Num frames 9400... -[2024-07-05 14:20:34,763][03359] Num frames 9500... -[2024-07-05 14:20:34,839][03359] Num frames 9600... -[2024-07-05 14:20:34,917][03359] Num frames 9700... -[2024-07-05 14:20:34,995][03359] Num frames 9800... -[2024-07-05 14:20:35,072][03359] Num frames 9900... -[2024-07-05 14:20:35,151][03359] Num frames 10000... -[2024-07-05 14:20:35,230][03359] Num frames 10100... -[2024-07-05 14:20:35,307][03359] Num frames 10200... -[2024-07-05 14:20:35,388][03359] Num frames 10300... -[2024-07-05 14:20:35,467][03359] Num frames 10400... -[2024-07-05 14:20:35,544][03359] Num frames 10500... -[2024-07-05 14:20:35,621][03359] Num frames 10600... -[2024-07-05 14:20:35,699][03359] Num frames 10700... -[2024-07-05 14:20:35,778][03359] Num frames 10800... -[2024-07-05 14:20:35,858][03359] Num frames 10900... -[2024-07-05 14:20:35,936][03359] Num frames 11000... -[2024-07-05 14:20:36,015][03359] Num frames 11100... -[2024-07-05 14:20:36,095][03359] Num frames 11200... -[2024-07-05 14:20:36,178][03359] Avg episode rewards: #0: 53.559, true rewards: #0: 18.727 -[2024-07-05 14:20:36,179][03359] Avg episode reward: 53.559, avg true_objective: 18.727 -[2024-07-05 14:20:36,228][03359] Num frames 11300... -[2024-07-05 14:20:36,311][03359] Num frames 11400... -[2024-07-05 14:20:36,389][03359] Num frames 11500... -[2024-07-05 14:20:36,467][03359] Num frames 11600... -[2024-07-05 14:20:36,546][03359] Num frames 11700... -[2024-07-05 14:20:36,623][03359] Num frames 11800... -[2024-07-05 14:20:36,702][03359] Num frames 11900... -[2024-07-05 14:20:36,794][03359] Num frames 12000... -[2024-07-05 14:20:36,869][03359] Num frames 12100... -[2024-07-05 14:20:36,945][03359] Num frames 12200... -[2024-07-05 14:20:37,023][03359] Num frames 12300... -[2024-07-05 14:20:37,103][03359] Num frames 12400... -[2024-07-05 14:20:37,179][03359] Num frames 12500... -[2024-07-05 14:20:37,255][03359] Num frames 12600... -[2024-07-05 14:20:37,334][03359] Num frames 12700... -[2024-07-05 14:20:37,412][03359] Num frames 12800... -[2024-07-05 14:20:37,491][03359] Num frames 12900... -[2024-07-05 14:20:37,570][03359] Num frames 13000... -[2024-07-05 14:20:37,647][03359] Num frames 13100... -[2024-07-05 14:20:37,723][03359] Num frames 13200... -[2024-07-05 14:20:37,802][03359] Num frames 13300... -[2024-07-05 14:20:37,884][03359] Avg episode rewards: #0: 54.479, true rewards: #0: 19.051 -[2024-07-05 14:20:37,885][03359] Avg episode reward: 54.479, avg true_objective: 19.051 -[2024-07-05 14:20:37,935][03359] Num frames 13400... -[2024-07-05 14:20:38,011][03359] Num frames 13500... -[2024-07-05 14:20:38,087][03359] Num frames 13600... -[2024-07-05 14:20:38,164][03359] Num frames 13700... -[2024-07-05 14:20:38,239][03359] Num frames 13800... -[2024-07-05 14:20:38,316][03359] Num frames 13900... -[2024-07-05 14:20:38,394][03359] Num frames 14000... -[2024-07-05 14:20:38,471][03359] Num frames 14100... -[2024-07-05 14:20:38,547][03359] Num frames 14200... -[2024-07-05 14:20:38,627][03359] Num frames 14300... -[2024-07-05 14:20:38,704][03359] Num frames 14400... -[2024-07-05 14:20:38,775][03359] Avg episode rewards: #0: 50.904, true rewards: #0: 18.030 -[2024-07-05 14:20:38,777][03359] Avg episode reward: 50.904, avg true_objective: 18.030 -[2024-07-05 14:20:38,836][03359] Num frames 14500... -[2024-07-05 14:20:38,912][03359] Num frames 14600... -[2024-07-05 14:20:38,988][03359] Num frames 14700... -[2024-07-05 14:20:39,061][03359] Num frames 14800... -[2024-07-05 14:20:39,137][03359] Num frames 14900... -[2024-07-05 14:20:39,211][03359] Num frames 15000... -[2024-07-05 14:20:39,289][03359] Num frames 15100... -[2024-07-05 14:20:39,364][03359] Num frames 15200... -[2024-07-05 14:20:39,439][03359] Num frames 15300... -[2024-07-05 14:20:39,513][03359] Num frames 15400... -[2024-07-05 14:20:39,590][03359] Num frames 15500... -[2024-07-05 14:20:39,667][03359] Num frames 15600... -[2024-07-05 14:20:39,742][03359] Num frames 15700... -[2024-07-05 14:20:39,819][03359] Num frames 15800... -[2024-07-05 14:20:39,894][03359] Num frames 15900... -[2024-07-05 14:20:39,970][03359] Avg episode rewards: #0: 49.252, true rewards: #0: 17.698 -[2024-07-05 14:20:39,971][03359] Avg episode reward: 49.252, avg true_objective: 17.698 -[2024-07-05 14:20:40,024][03359] Num frames 16000... -[2024-07-05 14:20:40,099][03359] Num frames 16100... -[2024-07-05 14:20:40,174][03359] Num frames 16200... -[2024-07-05 14:20:40,252][03359] Num frames 16300... -[2024-07-05 14:20:40,342][03359] Num frames 16400... -[2024-07-05 14:20:40,419][03359] Num frames 16500... -[2024-07-05 14:20:40,493][03359] Num frames 16600... -[2024-07-05 14:20:40,569][03359] Num frames 16700... -[2024-07-05 14:20:40,646][03359] Num frames 16800... -[2024-07-05 14:20:40,720][03359] Num frames 16900... -[2024-07-05 14:20:40,796][03359] Num frames 17000... -[2024-07-05 14:20:40,871][03359] Num frames 17100... -[2024-07-05 14:20:40,948][03359] Num frames 17200... -[2024-07-05 14:20:41,025][03359] Num frames 17300... -[2024-07-05 14:20:41,101][03359] Num frames 17400... -[2024-07-05 14:20:41,180][03359] Num frames 17500... -[2024-07-05 14:20:41,256][03359] Num frames 17600... -[2024-07-05 14:20:41,333][03359] Num frames 17700... -[2024-07-05 14:20:41,411][03359] Num frames 17800... -[2024-07-05 14:20:41,489][03359] Num frames 17900... -[2024-07-05 14:20:41,565][03359] Num frames 18000... -[2024-07-05 14:20:41,641][03359] Avg episode rewards: #0: 50.427, true rewards: #0: 18.028 -[2024-07-05 14:20:41,643][03359] Avg episode reward: 50.427, avg true_objective: 18.028 -[2024-07-05 14:21:03,566][03359] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 14:21:48,882][03359] Environment doom_basic already registered, overwriting... -[2024-07-05 14:21:48,885][03359] Environment doom_two_colors_easy already registered, overwriting... -[2024-07-05 14:21:48,885][03359] Environment doom_two_colors_hard already registered, overwriting... -[2024-07-05 14:21:48,886][03359] Environment doom_dm already registered, overwriting... -[2024-07-05 14:21:48,886][03359] Environment doom_dwango5 already registered, overwriting... -[2024-07-05 14:21:48,886][03359] Environment doom_my_way_home_flat_actions already registered, overwriting... -[2024-07-05 14:21:48,887][03359] Environment doom_defend_the_center_flat_actions already registered, overwriting... -[2024-07-05 14:21:48,887][03359] Environment doom_my_way_home already registered, overwriting... -[2024-07-05 14:21:48,888][03359] Environment doom_deadly_corridor already registered, overwriting... -[2024-07-05 14:21:48,888][03359] Environment doom_defend_the_center already registered, overwriting... -[2024-07-05 14:21:48,889][03359] Environment doom_defend_the_line already registered, overwriting... -[2024-07-05 14:21:48,889][03359] Environment doom_health_gathering already registered, overwriting... -[2024-07-05 14:21:48,889][03359] Environment doom_health_gathering_supreme already registered, overwriting... -[2024-07-05 14:21:48,890][03359] Environment doom_battle already registered, overwriting... -[2024-07-05 14:21:48,890][03359] Environment doom_battle2 already registered, overwriting... -[2024-07-05 14:21:48,890][03359] Environment doom_duel_bots already registered, overwriting... -[2024-07-05 14:21:48,891][03359] Environment doom_deathmatch_bots already registered, overwriting... -[2024-07-05 14:21:48,891][03359] Environment doom_duel already registered, overwriting... -[2024-07-05 14:21:48,892][03359] Environment doom_deathmatch_full already registered, overwriting... -[2024-07-05 14:21:48,892][03359] Environment doom_benchmark already registered, overwriting... -[2024-07-05 14:21:48,893][03359] register_encoder_factory: -[2024-07-05 14:21:48,901][03359] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 14:21:48,902][03359] Overriding arg 'train_for_env_steps' with value 450000000 passed from command line -[2024-07-05 14:21:48,908][03359] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment already exists! -[2024-07-05 14:21:48,909][03359] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment... -[2024-07-05 14:21:48,909][03359] Weights and Biases integration disabled -[2024-07-05 14:21:48,912][03359] Environment var CUDA_VISIBLE_DEVICES is 0 + wait_policy_total: 3.3309 +update_model: 3.8678 + weight_update: 0.0012 +one_step: 0.0029 + handle_policy_step: 523.9220 + deserialize: 7.3432, stack: 1.1157, obs_to_device_normalize: 87.8864, forward: 310.3393, send_messages: 10.3367 + prepare_outputs: 98.4612 + to_cpu: 88.9070 +[2024-07-05 10:39:38,881][17621] Learner 0 profile tree view: +misc: 0.0053, prepare_batch: 18.7216 +train: 415.7077 + epoch_init: 0.0536, minibatch_init: 0.0575, losses_postprocess: 0.4229, kl_divergence: 0.2211, after_optimizer: 1.9888 + calculate_losses: 139.0886 + losses_init: 0.0246, forward_head: 12.1004, bptt_initial: 123.7953, tail: 0.5311, advantages_returns: 0.1292, losses: 1.3283 + bptt: 0.8529 + bptt_forward_core: 0.8145 + update: 272.4793 + clip: 1.0972 +[2024-07-05 10:39:38,881][17621] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.1033, enqueue_policy_requests: 7.1716, env_step: 106.9268, overhead: 9.2646, complete_rollouts: 0.1950 +save_policy_outputs: 9.6318 + split_output_tensors: 4.4998 +[2024-07-05 10:39:38,881][17621] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.1014, enqueue_policy_requests: 7.3546, env_step: 106.6730, overhead: 9.3844, complete_rollouts: 0.1968 +save_policy_outputs: 9.7707 + split_output_tensors: 4.5739 +[2024-07-05 10:39:38,882][17621] Loop Runner_EvtLoop terminating... +[2024-07-05 10:39:38,882][17621] Runner profile tree view: +main_loop: 550.5557 +[2024-07-05 10:39:38,883][17621] Collected {0: 10006528}, FPS: 9069.1 +[2024-07-05 10:41:06,918][17621] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json +[2024-07-05 10:41:06,919][17621] Overriding arg 'num_workers' with value 1 passed from command line +[2024-07-05 10:41:06,920][17621] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-07-05 10:41:06,920][17621] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-07-05 10:41:06,921][17621] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 10:41:06,921][17621] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-07-05 10:41:06,921][17621] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 10:41:06,921][17621] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-07-05 10:41:06,922][17621] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-07-05 10:41:06,922][17621] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-07-05 10:41:06,922][17621] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-07-05 10:41:06,923][17621] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-07-05 10:41:06,923][17621] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-07-05 10:41:06,923][17621] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-07-05 10:41:06,924][17621] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-07-05 10:41:06,941][17621] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:41:06,942][17621] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 10:41:06,943][17621] RunningMeanStd input shape: (1,) +[2024-07-05 10:41:06,951][17621] Num input channels: 3 +[2024-07-05 10:41:06,960][17621] Convolutional layer output size: 4608 +[2024-07-05 10:41:06,973][17621] Policy head output size: 512 +[2024-07-05 10:41:08,681][17621] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-07-05 10:41:09,496][17621] Num frames 100... +[2024-07-05 10:41:09,572][17621] Num frames 200... +[2024-07-05 10:41:09,655][17621] Num frames 300... +[2024-07-05 10:41:09,736][17621] Num frames 400... +[2024-07-05 10:41:09,814][17621] Num frames 500... +[2024-07-05 10:41:09,896][17621] Num frames 600... +[2024-07-05 10:41:10,005][17621] Avg episode rewards: #0: 14.720, true rewards: #0: 6.720 +[2024-07-05 10:41:10,006][17621] Avg episode reward: 14.720, avg true_objective: 6.720 +[2024-07-05 10:41:10,034][17621] Num frames 700... +[2024-07-05 10:41:10,108][17621] Num frames 800... +[2024-07-05 10:41:10,184][17621] Num frames 900... +[2024-07-05 10:41:10,267][17621] Num frames 1000... +[2024-07-05 10:41:10,342][17621] Num frames 1100... +[2024-07-05 10:41:10,415][17621] Num frames 1200... +[2024-07-05 10:41:10,491][17621] Num frames 1300... +[2024-07-05 10:41:10,565][17621] Num frames 1400... +[2024-07-05 10:41:10,637][17621] Num frames 1500... +[2024-07-05 10:41:10,712][17621] Num frames 1600... +[2024-07-05 10:41:10,787][17621] Num frames 1700... +[2024-07-05 10:41:10,869][17621] Num frames 1800... +[2024-07-05 10:41:10,940][17621] Avg episode rewards: #0: 21.120, true rewards: #0: 9.120 +[2024-07-05 10:41:10,941][17621] Avg episode reward: 21.120, avg true_objective: 9.120 +[2024-07-05 10:41:11,003][17621] Num frames 1900... +[2024-07-05 10:41:11,074][17621] Num frames 2000... +[2024-07-05 10:41:11,147][17621] Num frames 2100... +[2024-07-05 10:41:11,220][17621] Num frames 2200... +[2024-07-05 10:41:11,297][17621] Num frames 2300... +[2024-07-05 10:41:11,372][17621] Num frames 2400... +[2024-07-05 10:41:11,445][17621] Num frames 2500... +[2024-07-05 10:41:11,522][17621] Num frames 2600... +[2024-07-05 10:41:11,601][17621] Num frames 2700... +[2024-07-05 10:41:11,675][17621] Num frames 2800... +[2024-07-05 10:41:11,753][17621] Num frames 2900... +[2024-07-05 10:41:11,826][17621] Num frames 3000... +[2024-07-05 10:41:11,908][17621] Num frames 3100... +[2024-07-05 10:41:11,983][17621] Num frames 3200... +[2024-07-05 10:41:12,058][17621] Num frames 3300... +[2024-07-05 10:41:12,137][17621] Num frames 3400... +[2024-07-05 10:41:12,215][17621] Num frames 3500... +[2024-07-05 10:41:12,291][17621] Num frames 3600... +[2024-07-05 10:41:12,371][17621] Num frames 3700... +[2024-07-05 10:41:12,446][17621] Num frames 3800... +[2024-07-05 10:41:12,523][17621] Num frames 3900... +[2024-07-05 10:41:12,595][17621] Avg episode rewards: #0: 32.746, true rewards: #0: 13.080 +[2024-07-05 10:41:12,596][17621] Avg episode reward: 32.746, avg true_objective: 13.080 +[2024-07-05 10:41:12,658][17621] Num frames 4000... +[2024-07-05 10:41:12,737][17621] Num frames 4100... +[2024-07-05 10:41:12,814][17621] Num frames 4200... +[2024-07-05 10:41:12,904][17621] Num frames 4300... +[2024-07-05 10:41:12,985][17621] Num frames 4400... +[2024-07-05 10:41:13,070][17621] Num frames 4500... +[2024-07-05 10:41:13,149][17621] Avg episode rewards: #0: 27.580, true rewards: #0: 11.330 +[2024-07-05 10:41:13,150][17621] Avg episode reward: 27.580, avg true_objective: 11.330 +[2024-07-05 10:41:13,205][17621] Num frames 4600... +[2024-07-05 10:41:13,283][17621] Num frames 4700... +[2024-07-05 10:41:13,361][17621] Num frames 4800... +[2024-07-05 10:41:13,437][17621] Num frames 4900... +[2024-07-05 10:41:13,511][17621] Num frames 5000... +[2024-07-05 10:41:13,587][17621] Num frames 5100... +[2024-07-05 10:41:13,659][17621] Num frames 5200... +[2024-07-05 10:41:13,730][17621] Num frames 5300... +[2024-07-05 10:41:13,804][17621] Num frames 5400... +[2024-07-05 10:41:13,879][17621] Avg episode rewards: #0: 27.456, true rewards: #0: 10.856 +[2024-07-05 10:41:13,880][17621] Avg episode reward: 27.456, avg true_objective: 10.856 +[2024-07-05 10:41:13,939][17621] Num frames 5500... +[2024-07-05 10:41:14,010][17621] Num frames 5600... +[2024-07-05 10:41:14,083][17621] Num frames 5700... +[2024-07-05 10:41:14,156][17621] Num frames 5800... +[2024-07-05 10:41:14,232][17621] Num frames 5900... +[2024-07-05 10:41:14,307][17621] Num frames 6000... +[2024-07-05 10:41:14,383][17621] Num frames 6100... +[2024-07-05 10:41:14,459][17621] Num frames 6200... +[2024-07-05 10:41:14,534][17621] Num frames 6300... +[2024-07-05 10:41:14,607][17621] Num frames 6400... +[2024-07-05 10:41:14,685][17621] Num frames 6500... +[2024-07-05 10:41:14,780][17621] Num frames 6600... +[2024-07-05 10:41:14,861][17621] Num frames 6700... +[2024-07-05 10:41:14,938][17621] Num frames 6800... +[2024-07-05 10:41:15,018][17621] Num frames 6900... +[2024-07-05 10:41:15,098][17621] Num frames 7000... +[2024-07-05 10:41:15,177][17621] Num frames 7100... +[2024-07-05 10:41:15,252][17621] Num frames 7200... +[2024-07-05 10:41:15,334][17621] Num frames 7300... +[2024-07-05 10:41:15,414][17621] Num frames 7400... +[2024-07-05 10:41:15,494][17621] Num frames 7500... +[2024-07-05 10:41:15,571][17621] Avg episode rewards: #0: 32.213, true rewards: #0: 12.547 +[2024-07-05 10:41:15,572][17621] Avg episode reward: 32.213, avg true_objective: 12.547 +[2024-07-05 10:41:15,631][17621] Num frames 7600... +[2024-07-05 10:41:15,708][17621] Num frames 7700... +[2024-07-05 10:41:15,787][17621] Num frames 7800... +[2024-07-05 10:41:15,864][17621] Num frames 7900... +[2024-07-05 10:41:15,945][17621] Num frames 8000... +[2024-07-05 10:41:16,030][17621] Num frames 8100... +[2024-07-05 10:41:16,108][17621] Num frames 8200... +[2024-07-05 10:41:16,187][17621] Num frames 8300... +[2024-07-05 10:41:16,271][17621] Num frames 8400... +[2024-07-05 10:41:16,362][17621] Num frames 8500... +[2024-07-05 10:41:16,443][17621] Num frames 8600... +[2024-07-05 10:41:16,519][17621] Num frames 8700... +[2024-07-05 10:41:16,598][17621] Num frames 8800... +[2024-07-05 10:41:16,672][17621] Num frames 8900... +[2024-07-05 10:41:16,749][17621] Num frames 9000... +[2024-07-05 10:41:16,827][17621] Num frames 9100... +[2024-07-05 10:41:16,905][17621] Num frames 9200... +[2024-07-05 10:41:16,998][17621] Avg episode rewards: #0: 33.213, true rewards: #0: 13.213 +[2024-07-05 10:41:16,999][17621] Avg episode reward: 33.213, avg true_objective: 13.213 +[2024-07-05 10:41:17,047][17621] Num frames 9300... +[2024-07-05 10:41:17,126][17621] Num frames 9400... +[2024-07-05 10:41:17,207][17621] Num frames 9500... +[2024-07-05 10:41:17,288][17621] Num frames 9600... +[2024-07-05 10:41:17,368][17621] Num frames 9700... +[2024-07-05 10:41:17,446][17621] Num frames 9800... +[2024-07-05 10:41:17,524][17621] Num frames 9900... +[2024-07-05 10:41:17,607][17621] Num frames 10000... +[2024-07-05 10:41:17,688][17621] Num frames 10100... +[2024-07-05 10:41:17,770][17621] Num frames 10200... +[2024-07-05 10:41:17,854][17621] Num frames 10300... +[2024-07-05 10:41:17,934][17621] Num frames 10400... +[2024-07-05 10:41:18,017][17621] Avg episode rewards: #0: 32.162, true rewards: #0: 13.038 +[2024-07-05 10:41:18,018][17621] Avg episode reward: 32.162, avg true_objective: 13.038 +[2024-07-05 10:41:18,081][17621] Num frames 10500... +[2024-07-05 10:41:18,165][17621] Num frames 10600... +[2024-07-05 10:41:18,244][17621] Num frames 10700... +[2024-07-05 10:41:18,326][17621] Num frames 10800... +[2024-07-05 10:41:18,411][17621] Num frames 10900... +[2024-07-05 10:41:18,496][17621] Num frames 11000... +[2024-07-05 10:41:18,583][17621] Num frames 11100... +[2024-07-05 10:41:18,662][17621] Num frames 11200... +[2024-07-05 10:41:18,745][17621] Num frames 11300... +[2024-07-05 10:41:18,827][17621] Num frames 11400... +[2024-07-05 10:41:18,900][17621] Avg episode rewards: #0: 30.802, true rewards: #0: 12.691 +[2024-07-05 10:41:18,902][17621] Avg episode reward: 30.802, avg true_objective: 12.691 +[2024-07-05 10:41:18,970][17621] Num frames 11500... +[2024-07-05 10:41:19,051][17621] Num frames 11600... +[2024-07-05 10:41:19,131][17621] Num frames 11700... +[2024-07-05 10:41:19,213][17621] Num frames 11800... +[2024-07-05 10:41:19,296][17621] Num frames 11900... +[2024-07-05 10:41:19,380][17621] Num frames 12000... +[2024-07-05 10:41:19,463][17621] Num frames 12100... +[2024-07-05 10:41:19,546][17621] Num frames 12200... +[2024-07-05 10:41:19,627][17621] Num frames 12300... +[2024-07-05 10:41:19,709][17621] Num frames 12400... +[2024-07-05 10:41:19,795][17621] Num frames 12500... +[2024-07-05 10:41:19,872][17621] Num frames 12600... +[2024-07-05 10:41:19,962][17621] Num frames 12700... +[2024-07-05 10:41:20,041][17621] Num frames 12800... +[2024-07-05 10:41:20,124][17621] Num frames 12900... +[2024-07-05 10:41:20,204][17621] Num frames 13000... +[2024-07-05 10:41:20,284][17621] Num frames 13100... +[2024-07-05 10:41:20,365][17621] Num frames 13200... +[2024-07-05 10:41:20,443][17621] Num frames 13300... +[2024-07-05 10:41:20,527][17621] Num frames 13400... +[2024-07-05 10:41:20,613][17621] Num frames 13500... +[2024-07-05 10:41:20,691][17621] Avg episode rewards: #0: 32.822, true rewards: #0: 13.522 +[2024-07-05 10:41:20,692][17621] Avg episode reward: 32.822, avg true_objective: 13.522 +[2024-07-05 10:41:35,312][17621] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/replay.mp4! +[2024-07-05 10:44:13,506][17621] Environment doom_basic already registered, overwriting... +[2024-07-05 10:44:13,508][17621] Environment doom_two_colors_easy already registered, overwriting... +[2024-07-05 10:44:13,509][17621] Environment doom_two_colors_hard already registered, overwriting... +[2024-07-05 10:44:13,509][17621] Environment doom_dm already registered, overwriting... +[2024-07-05 10:44:13,509][17621] Environment doom_dwango5 already registered, overwriting... +[2024-07-05 10:44:13,509][17621] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-07-05 10:44:13,510][17621] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-07-05 10:44:13,510][17621] Environment doom_my_way_home already registered, overwriting... +[2024-07-05 10:44:13,510][17621] Environment doom_deadly_corridor already registered, overwriting... +[2024-07-05 10:44:13,510][17621] Environment doom_defend_the_center already registered, overwriting... +[2024-07-05 10:44:13,511][17621] Environment doom_defend_the_line already registered, overwriting... +[2024-07-05 10:44:13,511][17621] Environment doom_health_gathering already registered, overwriting... +[2024-07-05 10:44:13,511][17621] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-07-05 10:44:13,512][17621] Environment doom_battle already registered, overwriting... +[2024-07-05 10:44:13,512][17621] Environment doom_battle2 already registered, overwriting... +[2024-07-05 10:44:13,512][17621] Environment doom_duel_bots already registered, overwriting... +[2024-07-05 10:44:13,512][17621] Environment doom_deathmatch_bots already registered, overwriting... +[2024-07-05 10:44:13,513][17621] Environment doom_duel already registered, overwriting... +[2024-07-05 10:44:13,513][17621] Environment doom_deathmatch_full already registered, overwriting... +[2024-07-05 10:44:13,513][17621] Environment doom_benchmark already registered, overwriting... +[2024-07-05 10:44:13,513][17621] register_encoder_factory: +[2024-07-05 10:44:13,519][17621] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json +[2024-07-05 10:44:13,519][17621] Overriding arg 'train_for_env_steps' with value 20000000 passed from command line +[2024-07-05 10:44:13,523][17621] Experiment dir /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet already exists! +[2024-07-05 10:44:13,524][17621] Resuming existing experiment from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet... +[2024-07-05 10:44:13,524][17621] Weights and Biases integration disabled +[2024-07-05 10:44:13,525][17621] Environment var CUDA_VISIBLE_DEVICES is 0 -[2024-07-05 14:21:52,407][03359] Starting experiment with the following configuration: +[2024-07-05 10:44:15,828][17621] Starting experiment with the following configuration: help=False algo=APPO env=doom_health_gathering_supreme -experiment=default_experiment +experiment=conv_resnet train_dir=/home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir restart_behavior=resume device=gpu @@ -17248,9 +3513,9 @@ num_batches_to_accumulate=2 worker_num_splits=2 policy_workers_per_policy=1 max_policy_lag=1000 -num_workers=16 -num_envs_per_worker=8 -batch_size=2048 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 num_batches_per_epoch=1 num_epochs=1 rollout=32 @@ -17298,7 +3563,7 @@ stats_avg=100 summaries_use_frameskip=True heartbeat_interval=20 heartbeat_reporting_interval=600 -train_for_env_steps=450000000 +train_for_env_steps=20000000 train_for_seconds=10000000000 save_every_sec=120 keep_checkpoints=2 @@ -17309,7 +3574,7 @@ save_best_metric=reward save_best_after=100000 benchmark=False encoder_mlp_layers=[512, 512] -encoder_conv_architecture=convnet_simple +encoder_conv_architecture=resnet_impala encoder_conv_mlp_layers=[512] use_rnn=True rnn_size=512 @@ -17358,77 +3623,57 @@ res_h=72 wide_aspect_ratio=False eval_env_frameskip=1 fps=35 -command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=20000000 -cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 20000000} +command_line=--env=doom_health_gathering_supreme --experiment=conv_resnet --seed=200 --num_workers=8 --num_envs_per_worker=4 --batch_size=1024 --encoder_conv_architecture=resnet_impala --train_for_env_steps=5000000 +cli_args={'env': 'doom_health_gathering_supreme', 'experiment': 'conv_resnet', 'seed': 200, 'num_workers': 8, 'num_envs_per_worker': 4, 'batch_size': 1024, 'train_for_env_steps': 5000000, 'encoder_conv_architecture': 'resnet_impala'} git_hash=unknown git_repo_name=not a git repository -[2024-07-05 14:21:52,408][03359] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 14:21:52,409][03359] Rollout worker 0 uses device cpu -[2024-07-05 14:21:52,410][03359] Rollout worker 1 uses device cpu -[2024-07-05 14:21:52,410][03359] Rollout worker 2 uses device cpu -[2024-07-05 14:21:52,411][03359] Rollout worker 3 uses device cpu -[2024-07-05 14:21:52,411][03359] Rollout worker 4 uses device cpu -[2024-07-05 14:21:52,411][03359] Rollout worker 5 uses device cpu -[2024-07-05 14:21:52,412][03359] Rollout worker 6 uses device cpu -[2024-07-05 14:21:52,412][03359] Rollout worker 7 uses device cpu -[2024-07-05 14:21:52,412][03359] Rollout worker 8 uses device cpu -[2024-07-05 14:21:52,412][03359] Rollout worker 9 uses device cpu -[2024-07-05 14:21:52,413][03359] Rollout worker 10 uses device cpu -[2024-07-05 14:21:52,413][03359] Rollout worker 11 uses device cpu -[2024-07-05 14:21:52,413][03359] Rollout worker 12 uses device cpu -[2024-07-05 14:21:52,414][03359] Rollout worker 13 uses device cpu -[2024-07-05 14:21:52,414][03359] Rollout worker 14 uses device cpu -[2024-07-05 14:21:52,414][03359] Rollout worker 15 uses device cpu -[2024-07-05 14:21:52,513][03359] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 14:21:52,514][03359] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 14:21:52,582][03359] Starting all processes... -[2024-07-05 14:21:52,582][03359] Starting process learner_proc0 -[2024-07-05 14:21:52,632][03359] Starting all processes... -[2024-07-05 14:21:52,635][03359] Starting process inference_proc0-0 -[2024-07-05 14:21:52,636][03359] Starting process rollout_proc0 -[2024-07-05 14:21:52,636][03359] Starting process rollout_proc1 -[2024-07-05 14:21:52,636][03359] Starting process rollout_proc2 -[2024-07-05 14:21:52,636][03359] Starting process rollout_proc3 -[2024-07-05 14:21:52,637][03359] Starting process rollout_proc4 -[2024-07-05 14:21:52,637][03359] Starting process rollout_proc5 -[2024-07-05 14:21:52,637][03359] Starting process rollout_proc6 -[2024-07-05 14:21:52,637][03359] Starting process rollout_proc7 -[2024-07-05 14:21:52,637][03359] Starting process rollout_proc8 -[2024-07-05 14:21:52,638][03359] Starting process rollout_proc9 -[2024-07-05 14:21:52,638][03359] Starting process rollout_proc10 -[2024-07-05 14:21:52,638][03359] Starting process rollout_proc11 -[2024-07-05 14:21:52,638][03359] Starting process rollout_proc12 -[2024-07-05 14:21:52,638][03359] Starting process rollout_proc13 -[2024-07-05 14:21:52,639][03359] Starting process rollout_proc14 -[2024-07-05 14:21:52,676][03359] Starting process rollout_proc15 -[2024-07-05 14:21:56,835][07119] Worker 2 uses CPU cores [2] -[2024-07-05 14:21:56,852][07125] Worker 10 uses CPU cores [10] -[2024-07-05 14:21:56,900][07117] Worker 1 uses CPU cores [1] -[2024-07-05 14:21:57,104][07146] Worker 15 uses CPU cores [15] -[2024-07-05 14:21:57,128][07095] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 14:21:57,129][07095] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 14:21:57,189][07095] Num visible devices: 1 -[2024-07-05 14:21:57,189][07118] Worker 3 uses CPU cores [3] -[2024-07-05 14:21:57,192][07123] Worker 4 uses CPU cores [4] -[2024-07-05 14:21:57,229][07095] Setting fixed seed 200 -[2024-07-05 14:21:57,236][07145] Worker 14 uses CPU cores [14] -[2024-07-05 14:21:57,240][07095] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 14:21:57,240][07095] Initializing actor-critic model on device cuda:0 -[2024-07-05 14:21:57,240][07095] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 14:21:57,241][07095] RunningMeanStd input shape: (1,) -[2024-07-05 14:21:57,254][07095] ConvEncoder: input_channels=3 -[2024-07-05 14:21:57,260][07116] Worker 0 uses CPU cores [0] -[2024-07-05 14:21:57,280][07126] Worker 9 uses CPU cores [9] -[2024-07-05 14:21:57,291][07124] Worker 7 uses CPU cores [7] -[2024-07-05 14:21:57,335][07143] Worker 11 uses CPU cores [11] -[2024-07-05 14:21:57,340][07120] Worker 6 uses CPU cores [6] -[2024-07-05 14:21:57,354][07095] Conv encoder output size: 512 -[2024-07-05 14:21:57,355][07095] Policy head output size: 512 -[2024-07-05 14:21:57,360][07142] Worker 12 uses CPU cores [12] -[2024-07-05 14:21:57,364][07115] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 14:21:57,364][07115] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 14:21:57,365][07095] Created Actor Critic model with architecture: -[2024-07-05 14:21:57,365][07095] ActorCriticSharedWeights( +[2024-07-05 10:44:15,829][17621] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json... +[2024-07-05 10:44:15,830][17621] Rollout worker 0 uses device cpu +[2024-07-05 10:44:15,830][17621] Rollout worker 1 uses device cpu +[2024-07-05 10:44:15,831][17621] Rollout worker 2 uses device cpu +[2024-07-05 10:44:15,831][17621] Rollout worker 3 uses device cpu +[2024-07-05 10:44:15,832][17621] Rollout worker 4 uses device cpu +[2024-07-05 10:44:15,832][17621] Rollout worker 5 uses device cpu +[2024-07-05 10:44:15,832][17621] Rollout worker 6 uses device cpu +[2024-07-05 10:44:15,833][17621] Rollout worker 7 uses device cpu +[2024-07-05 10:44:15,907][17621] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:44:15,907][17621] InferenceWorker_p0-w0: min num requests: 2 +[2024-07-05 10:44:15,933][17621] Starting all processes... +[2024-07-05 10:44:15,934][17621] Starting process learner_proc0 +[2024-07-05 10:44:15,983][17621] Starting all processes... +[2024-07-05 10:44:15,986][17621] Starting process inference_proc0-0 +[2024-07-05 10:44:15,986][17621] Starting process rollout_proc0 +[2024-07-05 10:44:15,987][17621] Starting process rollout_proc1 +[2024-07-05 10:44:15,987][17621] Starting process rollout_proc2 +[2024-07-05 10:44:15,987][17621] Starting process rollout_proc3 +[2024-07-05 10:44:15,987][17621] Starting process rollout_proc4 +[2024-07-05 10:44:15,987][17621] Starting process rollout_proc5 +[2024-07-05 10:44:15,987][17621] Starting process rollout_proc6 +[2024-07-05 10:44:15,987][17621] Starting process rollout_proc7 +[2024-07-05 10:44:18,628][22241] Worker 2 uses CPU cores [4, 5] +[2024-07-05 10:44:18,664][22245] Worker 6 uses CPU cores [12, 13] +[2024-07-05 10:44:18,783][22239] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:44:18,783][22239] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-07-05 10:44:18,784][22240] Worker 1 uses CPU cores [2, 3] +[2024-07-05 10:44:18,793][22238] Worker 0 uses CPU cores [0, 1] +[2024-07-05 10:44:18,801][22244] Worker 5 uses CPU cores [10, 11] +[2024-07-05 10:44:18,829][22239] Num visible devices: 1 +[2024-07-05 10:44:18,949][22225] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:44:18,949][22225] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-07-05 10:44:18,992][22225] Num visible devices: 1 +[2024-07-05 10:44:19,009][22242] Worker 4 uses CPU cores [8, 9] +[2024-07-05 10:44:19,020][22225] Setting fixed seed 200 +[2024-07-05 10:44:19,021][22225] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:44:19,021][22225] Initializing actor-critic model on device cuda:0 +[2024-07-05 10:44:19,022][22225] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 10:44:19,023][22225] RunningMeanStd input shape: (1,) +[2024-07-05 10:44:19,032][22225] Num input channels: 3 +[2024-07-05 10:44:19,046][22225] Convolutional layer output size: 4608 +[2024-07-05 10:44:19,052][22246] Worker 7 uses CPU cores [14, 15] +[2024-07-05 10:44:19,057][22225] Policy head output size: 512 +[2024-07-05 10:44:19,144][22243] Worker 3 uses CPU cores [6, 7] +[2024-07-05 10:44:19,159][22225] Created Actor Critic model with architecture: +[2024-07-05 10:44:19,159][22225] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -17438,23 +3683,67 @@ git_repo_name=not a git repository ) (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) + (basic_encoder): ResnetEncoder( + (conv_head): Sequential( + (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (2): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (3): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (5): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (6): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) + (7): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) + (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (9): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (10): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (11): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (12): ELU(alpha=1.0) + ) + (mlp_layers): Sequential( + (0): Linear(in_features=4608, out_features=512, bias=True) + (1): ELU(alpha=1.0) ) ) ) @@ -17469,1792 +3758,970 @@ git_repo_name=not a git repository (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-07-05 14:21:57,368][07121] Worker 5 uses CPU cores [5] -[2024-07-05 14:21:57,401][07122] Worker 8 uses CPU cores [8] -[2024-07-05 14:21:57,416][07144] Worker 13 uses CPU cores [13] -[2024-07-05 14:21:57,426][07115] Num visible devices: 1 -[2024-07-05 14:21:57,478][07095] Using optimizer -[2024-07-05 14:21:58,078][07095] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000051272_400015360.pth... -[2024-07-05 14:21:58,099][07095] Loading model from checkpoint -[2024-07-05 14:21:58,100][07095] Loaded experiment state at self.train_step=51272, self.env_steps=400015360 -[2024-07-05 14:21:58,100][07095] Initialized policy 0 weights for model version 51272 -[2024-07-05 14:21:58,101][07095] LearnerWorker_p0 finished initialization! -[2024-07-05 14:21:58,101][07095] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 14:21:58,170][07115] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 14:21:58,170][07115] RunningMeanStd input shape: (1,) -[2024-07-05 14:21:58,178][07115] ConvEncoder: input_channels=3 -[2024-07-05 14:21:58,232][07115] Conv encoder output size: 512 -[2024-07-05 14:21:58,232][07115] Policy head output size: 512 -[2024-07-05 14:21:58,266][03359] Inference worker 0-0 is ready! -[2024-07-05 14:21:58,267][03359] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 14:21:58,338][07121] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,342][07118] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,342][07146] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,343][07126] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,343][07119] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,343][07144] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,344][07120] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,344][07143] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,346][07125] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,349][07124] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,350][07117] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,354][07116] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,361][07145] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,363][07122] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,369][07142] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,370][07123] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:21:58,912][03359] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 400015360. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 14:21:58,963][07146] Decorrelating experience for 0 frames... -[2024-07-05 14:21:58,964][07121] Decorrelating experience for 0 frames... -[2024-07-05 14:21:58,965][07119] Decorrelating experience for 0 frames... -[2024-07-05 14:21:58,969][07117] Decorrelating experience for 0 frames... -[2024-07-05 14:21:58,969][07124] Decorrelating experience for 0 frames... -[2024-07-05 14:21:58,969][07126] Decorrelating experience for 0 frames... -[2024-07-05 14:21:58,971][07144] Decorrelating experience for 0 frames... -[2024-07-05 14:21:58,972][07142] Decorrelating experience for 0 frames... -[2024-07-05 14:21:59,143][07122] Decorrelating experience for 0 frames... -[2024-07-05 14:21:59,143][07117] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,145][07142] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,195][07145] Decorrelating experience for 0 frames... -[2024-07-05 14:21:59,210][07124] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,234][07120] Decorrelating experience for 0 frames... -[2024-07-05 14:21:59,343][07121] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,367][07122] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,399][07117] Decorrelating experience for 64 frames... -[2024-07-05 14:21:59,399][07116] Decorrelating experience for 0 frames... -[2024-07-05 14:21:59,410][07124] Decorrelating experience for 64 frames... -[2024-07-05 14:21:59,420][07143] Decorrelating experience for 0 frames... -[2024-07-05 14:21:59,423][07145] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,469][07125] Decorrelating experience for 0 frames... -[2024-07-05 14:21:59,551][07122] Decorrelating experience for 64 frames... -[2024-07-05 14:21:59,571][07121] Decorrelating experience for 64 frames... -[2024-07-05 14:21:59,616][07124] Decorrelating experience for 96 frames... -[2024-07-05 14:21:59,640][07116] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,640][07123] Decorrelating experience for 0 frames... -[2024-07-05 14:21:59,641][07146] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,668][07117] Decorrelating experience for 96 frames... -[2024-07-05 14:21:59,753][07122] Decorrelating experience for 96 frames... -[2024-07-05 14:21:59,801][07125] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,824][07123] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,848][07142] Decorrelating experience for 64 frames... -[2024-07-05 14:21:59,864][07143] Decorrelating experience for 32 frames... -[2024-07-05 14:21:59,889][07116] Decorrelating experience for 64 frames... -[2024-07-05 14:21:59,912][07124] Decorrelating experience for 128 frames... -[2024-07-05 14:21:59,928][07145] Decorrelating experience for 64 frames... -[2024-07-05 14:21:59,993][07146] Decorrelating experience for 64 frames... -[2024-07-05 14:22:00,050][07142] Decorrelating experience for 96 frames... -[2024-07-05 14:22:00,072][07118] Decorrelating experience for 0 frames... -[2024-07-05 14:22:00,086][07123] Decorrelating experience for 64 frames... -[2024-07-05 14:22:00,100][07121] Decorrelating experience for 96 frames... -[2024-07-05 14:22:00,207][07116] Decorrelating experience for 96 frames... -[2024-07-05 14:22:00,207][07117] Decorrelating experience for 128 frames... -[2024-07-05 14:22:00,253][07119] Decorrelating experience for 32 frames... -[2024-07-05 14:22:00,298][07122] Decorrelating experience for 128 frames... -[2024-07-05 14:22:00,303][07124] Decorrelating experience for 160 frames... -[2024-07-05 14:22:00,338][07123] Decorrelating experience for 96 frames... -[2024-07-05 14:22:00,423][07125] Decorrelating experience for 64 frames... -[2024-07-05 14:22:00,469][07146] Decorrelating experience for 96 frames... -[2024-07-05 14:22:00,491][07117] Decorrelating experience for 160 frames... -[2024-07-05 14:22:00,513][07119] Decorrelating experience for 64 frames... -[2024-07-05 14:22:00,528][07145] Decorrelating experience for 96 frames... -[2024-07-05 14:22:00,575][07118] Decorrelating experience for 32 frames... -[2024-07-05 14:22:00,636][07123] Decorrelating experience for 128 frames... -[2024-07-05 14:22:00,686][07125] Decorrelating experience for 96 frames... -[2024-07-05 14:22:00,696][07142] Decorrelating experience for 128 frames... -[2024-07-05 14:22:00,716][07124] Decorrelating experience for 192 frames... -[2024-07-05 14:22:00,794][07118] Decorrelating experience for 64 frames... -[2024-07-05 14:22:00,797][07143] Decorrelating experience for 64 frames... -[2024-07-05 14:22:00,816][07121] Decorrelating experience for 128 frames... -[2024-07-05 14:22:00,914][07146] Decorrelating experience for 128 frames... -[2024-07-05 14:22:00,963][07119] Decorrelating experience for 96 frames... -[2024-07-05 14:22:00,971][07145] Decorrelating experience for 128 frames... -[2024-07-05 14:22:01,044][07122] Decorrelating experience for 160 frames... -[2024-07-05 14:22:01,064][07120] Decorrelating experience for 32 frames... -[2024-07-05 14:22:01,065][07123] Decorrelating experience for 160 frames... -[2024-07-05 14:22:01,071][07124] Decorrelating experience for 224 frames... -[2024-07-05 14:22:01,156][07121] Decorrelating experience for 160 frames... -[2024-07-05 14:22:01,181][07118] Decorrelating experience for 96 frames... -[2024-07-05 14:22:01,202][07146] Decorrelating experience for 160 frames... -[2024-07-05 14:22:01,219][07125] Decorrelating experience for 128 frames... -[2024-07-05 14:22:01,285][07116] Decorrelating experience for 128 frames... -[2024-07-05 14:22:01,334][07143] Decorrelating experience for 96 frames... -[2024-07-05 14:22:01,374][07120] Decorrelating experience for 64 frames... -[2024-07-05 14:22:01,449][07142] Decorrelating experience for 160 frames... -[2024-07-05 14:22:01,475][07117] Decorrelating experience for 192 frames... -[2024-07-05 14:22:01,496][07126] Decorrelating experience for 32 frames... -[2024-07-05 14:22:01,590][07119] Decorrelating experience for 128 frames... -[2024-07-05 14:22:01,622][07116] Decorrelating experience for 160 frames... -[2024-07-05 14:22:01,658][07146] Decorrelating experience for 192 frames... -[2024-07-05 14:22:01,699][07143] Decorrelating experience for 128 frames... -[2024-07-05 14:22:01,746][07122] Decorrelating experience for 192 frames... -[2024-07-05 14:22:01,747][07144] Decorrelating experience for 32 frames... -[2024-07-05 14:22:01,780][07125] Decorrelating experience for 160 frames... -[2024-07-05 14:22:01,809][07126] Decorrelating experience for 64 frames... -[2024-07-05 14:22:01,881][07142] Decorrelating experience for 192 frames... -[2024-07-05 14:22:01,926][07116] Decorrelating experience for 192 frames... -[2024-07-05 14:22:01,941][07121] Decorrelating experience for 192 frames... -[2024-07-05 14:22:02,022][07146] Decorrelating experience for 224 frames... -[2024-07-05 14:22:02,025][07118] Decorrelating experience for 128 frames... -[2024-07-05 14:22:02,056][07123] Decorrelating experience for 192 frames... -[2024-07-05 14:22:02,079][07126] Decorrelating experience for 96 frames... -[2024-07-05 14:22:02,087][07125] Decorrelating experience for 192 frames... -[2024-07-05 14:22:02,221][07120] Decorrelating experience for 96 frames... -[2024-07-05 14:22:02,221][07142] Decorrelating experience for 224 frames... -[2024-07-05 14:22:02,252][07116] Decorrelating experience for 224 frames... -[2024-07-05 14:22:02,307][07143] Decorrelating experience for 160 frames... -[2024-07-05 14:22:02,315][07121] Decorrelating experience for 224 frames... -[2024-07-05 14:22:02,334][07145] Decorrelating experience for 160 frames... -[2024-07-05 14:22:02,349][07122] Decorrelating experience for 224 frames... -[2024-07-05 14:22:02,359][07118] Decorrelating experience for 160 frames... -[2024-07-05 14:22:02,525][07119] Decorrelating experience for 160 frames... -[2024-07-05 14:22:02,621][07120] Decorrelating experience for 128 frames... -[2024-07-05 14:22:02,640][07117] Decorrelating experience for 224 frames... -[2024-07-05 14:22:02,642][07126] Decorrelating experience for 128 frames... -[2024-07-05 14:22:02,668][07125] Decorrelating experience for 224 frames... -[2024-07-05 14:22:02,675][07123] Decorrelating experience for 224 frames... -[2024-07-05 14:22:02,705][07143] Decorrelating experience for 192 frames... -[2024-07-05 14:22:02,944][07144] Decorrelating experience for 64 frames... -[2024-07-05 14:22:02,963][07119] Decorrelating experience for 192 frames... -[2024-07-05 14:22:02,963][07120] Decorrelating experience for 160 frames... -[2024-07-05 14:22:02,988][07126] Decorrelating experience for 160 frames... -[2024-07-05 14:22:03,025][07145] Decorrelating experience for 192 frames... -[2024-07-05 14:22:03,035][07118] Decorrelating experience for 192 frames... -[2024-07-05 14:22:03,106][07143] Decorrelating experience for 224 frames... -[2024-07-05 14:22:03,314][07144] Decorrelating experience for 96 frames... -[2024-07-05 14:22:03,367][07095] Signal inference workers to stop experience collection... -[2024-07-05 14:22:03,392][07115] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 14:22:03,412][07126] Decorrelating experience for 192 frames... -[2024-07-05 14:22:03,412][07119] Decorrelating experience for 224 frames... -[2024-07-05 14:22:03,421][07120] Decorrelating experience for 192 frames... -[2024-07-05 14:22:03,556][07145] Decorrelating experience for 224 frames... -[2024-07-05 14:22:03,636][07144] Decorrelating experience for 128 frames... -[2024-07-05 14:22:03,656][07118] Decorrelating experience for 224 frames... -[2024-07-05 14:22:03,800][07126] Decorrelating experience for 224 frames... -[2024-07-05 14:22:03,912][03359] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 400015360. Throughput: 0: 4.8. Samples: 24. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 14:22:03,914][03359] Avg episode reward: [(0, '0.863')] -[2024-07-05 14:22:03,973][07120] Decorrelating experience for 224 frames... -[2024-07-05 14:22:04,151][07144] Decorrelating experience for 160 frames... -[2024-07-05 14:22:04,367][07144] Decorrelating experience for 192 frames... -[2024-07-05 14:22:04,584][07144] Decorrelating experience for 224 frames... -[2024-07-05 14:22:05,046][07095] Signal inference workers to resume experience collection... -[2024-07-05 14:22:05,046][07115] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 14:22:07,227][07115] Updated weights for policy 0, policy_version 51282 (0.0094) -[2024-07-05 14:22:08,912][03359] Fps is (10 sec: 13926.3, 60 sec: 13926.3, 300 sec: 13926.3). Total num frames: 400154624. Throughput: 0: 2284.4. Samples: 22844. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:22:08,913][03359] Avg episode reward: [(0, '7.933')] -[2024-07-05 14:22:09,449][07115] Updated weights for policy 0, policy_version 51292 (0.0011) -[2024-07-05 14:22:11,630][07115] Updated weights for policy 0, policy_version 51302 (0.0010) -[2024-07-05 14:22:12,506][03359] Heartbeat connected on Batcher_0 -[2024-07-05 14:22:12,509][03359] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 14:22:12,519][03359] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 14:22:12,521][03359] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 14:22:12,527][03359] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 14:22:12,530][03359] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 14:22:12,531][03359] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 14:22:12,533][03359] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 14:22:12,540][03359] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 14:22:12,541][03359] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 14:22:12,543][03359] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 14:22:12,567][03359] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 14:22:12,568][03359] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 14:22:12,572][03359] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 14:22:12,574][03359] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 14:22:12,577][03359] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 14:22:12,579][03359] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 14:22:12,584][03359] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 14:22:12,592][03359] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 14:22:13,794][07115] Updated weights for policy 0, policy_version 51312 (0.0010) -[2024-07-05 14:22:13,912][03359] Fps is (10 sec: 32768.1, 60 sec: 21845.4, 300 sec: 21845.4). Total num frames: 400343040. Throughput: 0: 5303.2. Samples: 79548. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:22:13,913][03359] Avg episode reward: [(0, '56.164')] -[2024-07-05 14:22:16,038][07115] Updated weights for policy 0, policy_version 51322 (0.0009) -[2024-07-05 14:22:18,239][07115] Updated weights for policy 0, policy_version 51332 (0.0011) -[2024-07-05 14:22:18,912][03359] Fps is (10 sec: 37683.6, 60 sec: 25804.8, 300 sec: 25804.8). Total num frames: 400531456. Throughput: 0: 5366.4. Samples: 107328. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:22:18,913][03359] Avg episode reward: [(0, '51.782')] -[2024-07-05 14:22:20,370][07115] Updated weights for policy 0, policy_version 51342 (0.0014) -[2024-07-05 14:22:22,489][07115] Updated weights for policy 0, policy_version 51352 (0.0010) -[2024-07-05 14:22:23,912][03359] Fps is (10 sec: 37683.4, 60 sec: 28180.6, 300 sec: 28180.6). Total num frames: 400719872. Throughput: 0: 6567.9. Samples: 164196. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:22:23,913][03359] Avg episode reward: [(0, '51.925')] -[2024-07-05 14:22:24,626][07115] Updated weights for policy 0, policy_version 51362 (0.0010) -[2024-07-05 14:22:26,868][07115] Updated weights for policy 0, policy_version 51372 (0.0013) -[2024-07-05 14:22:28,912][03359] Fps is (10 sec: 36863.9, 60 sec: 29491.2, 300 sec: 29491.2). Total num frames: 400900096. Throughput: 0: 7329.7. Samples: 219892. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:22:28,914][03359] Avg episode reward: [(0, '54.341')] -[2024-07-05 14:22:29,086][07115] Updated weights for policy 0, policy_version 51382 (0.0012) -[2024-07-05 14:22:31,272][07115] Updated weights for policy 0, policy_version 51392 (0.0018) -[2024-07-05 14:22:33,446][07115] Updated weights for policy 0, policy_version 51402 (0.0013) -[2024-07-05 14:22:33,912][03359] Fps is (10 sec: 37683.1, 60 sec: 30895.6, 300 sec: 30895.6). Total num frames: 401096704. Throughput: 0: 7079.0. Samples: 247764. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:22:33,913][03359] Avg episode reward: [(0, '55.270')] -[2024-07-05 14:22:35,661][07115] Updated weights for policy 0, policy_version 51412 (0.0009) -[2024-07-05 14:22:37,966][07115] Updated weights for policy 0, policy_version 51422 (0.0009) -[2024-07-05 14:22:38,912][03359] Fps is (10 sec: 37683.2, 60 sec: 31539.2, 300 sec: 31539.2). Total num frames: 401276928. Throughput: 0: 7592.7. Samples: 303708. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:22:38,913][03359] Avg episode reward: [(0, '52.881')] -[2024-07-05 14:22:40,191][07115] Updated weights for policy 0, policy_version 51432 (0.0010) -[2024-07-05 14:22:42,363][07115] Updated weights for policy 0, policy_version 51442 (0.0009) -[2024-07-05 14:22:43,912][03359] Fps is (10 sec: 36864.3, 60 sec: 32221.9, 300 sec: 32221.9). Total num frames: 401465344. Throughput: 0: 7987.5. Samples: 359436. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:22:43,913][03359] Avg episode reward: [(0, '53.459')] -[2024-07-05 14:22:44,527][07115] Updated weights for policy 0, policy_version 51452 (0.0010) -[2024-07-05 14:22:46,741][07115] Updated weights for policy 0, policy_version 51462 (0.0013) -[2024-07-05 14:22:48,912][03359] Fps is (10 sec: 36863.8, 60 sec: 32604.1, 300 sec: 32604.1). Total num frames: 401645568. Throughput: 0: 8612.5. Samples: 387588. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:22:48,914][03359] Avg episode reward: [(0, '55.455')] -[2024-07-05 14:22:48,951][07115] Updated weights for policy 0, policy_version 51472 (0.0010) -[2024-07-05 14:22:51,211][07115] Updated weights for policy 0, policy_version 51482 (0.0015) -[2024-07-05 14:22:53,541][07115] Updated weights for policy 0, policy_version 51492 (0.0012) -[2024-07-05 14:22:53,912][03359] Fps is (10 sec: 36044.6, 60 sec: 32917.0, 300 sec: 32917.0). Total num frames: 401825792. Throughput: 0: 9328.8. Samples: 442640. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:22:53,913][03359] Avg episode reward: [(0, '52.743')] -[2024-07-05 14:22:55,957][07115] Updated weights for policy 0, policy_version 51502 (0.0010) -[2024-07-05 14:22:58,218][07115] Updated weights for policy 0, policy_version 51512 (0.0014) -[2024-07-05 14:22:58,912][03359] Fps is (10 sec: 35225.9, 60 sec: 33041.1, 300 sec: 33041.1). Total num frames: 401997824. Throughput: 0: 9222.9. Samples: 494580. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:22:58,914][03359] Avg episode reward: [(0, '55.806')] -[2024-07-05 14:23:00,482][07115] Updated weights for policy 0, policy_version 51522 (0.0011) -[2024-07-05 14:23:02,816][07115] Updated weights for policy 0, policy_version 51532 (0.0010) -[2024-07-05 14:23:03,912][03359] Fps is (10 sec: 35225.4, 60 sec: 36044.8, 300 sec: 33272.1). Total num frames: 402178048. Throughput: 0: 9197.8. Samples: 521228. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:23:03,914][03359] Avg episode reward: [(0, '55.635')] -[2024-07-05 14:23:05,147][07115] Updated weights for policy 0, policy_version 51542 (0.0009) -[2024-07-05 14:23:07,370][07115] Updated weights for policy 0, policy_version 51552 (0.0009) -[2024-07-05 14:23:08,912][03359] Fps is (10 sec: 36044.8, 60 sec: 36727.5, 300 sec: 33470.2). Total num frames: 402358272. Throughput: 0: 9130.4. Samples: 575064. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:23:08,914][03359] Avg episode reward: [(0, '51.994')] -[2024-07-05 14:23:09,610][07115] Updated weights for policy 0, policy_version 51562 (0.0009) -[2024-07-05 14:23:11,790][07115] Updated weights for policy 0, policy_version 51572 (0.0013) -[2024-07-05 14:23:13,912][03359] Fps is (10 sec: 36863.8, 60 sec: 36727.4, 300 sec: 33751.0). Total num frames: 402546688. Throughput: 0: 9125.9. Samples: 630560. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:23:13,913][03359] Avg episode reward: [(0, '54.288')] -[2024-07-05 14:23:13,996][07115] Updated weights for policy 0, policy_version 51582 (0.0011) -[2024-07-05 14:23:16,245][07115] Updated weights for policy 0, policy_version 51592 (0.0015) -[2024-07-05 14:23:18,552][07115] Updated weights for policy 0, policy_version 51602 (0.0009) -[2024-07-05 14:23:18,912][03359] Fps is (10 sec: 36864.2, 60 sec: 36591.0, 300 sec: 33894.4). Total num frames: 402726912. Throughput: 0: 9119.4. Samples: 658136. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:23:18,913][03359] Avg episode reward: [(0, '53.501')] -[2024-07-05 14:23:21,043][07115] Updated weights for policy 0, policy_version 51612 (0.0014) -[2024-07-05 14:23:23,309][07115] Updated weights for policy 0, policy_version 51622 (0.0014) -[2024-07-05 14:23:23,912][03359] Fps is (10 sec: 36045.1, 60 sec: 36454.4, 300 sec: 34020.9). Total num frames: 402907136. Throughput: 0: 9047.6. Samples: 710852. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:23:23,914][03359] Avg episode reward: [(0, '54.358')] -[2024-07-05 14:23:25,510][07115] Updated weights for policy 0, policy_version 51632 (0.0010) -[2024-07-05 14:23:27,870][07115] Updated weights for policy 0, policy_version 51642 (0.0012) -[2024-07-05 14:23:28,912][03359] Fps is (10 sec: 35225.1, 60 sec: 36317.8, 300 sec: 34042.3). Total num frames: 403079168. Throughput: 0: 8999.9. Samples: 764432. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:23:28,914][03359] Avg episode reward: [(0, '51.941')] -[2024-07-05 14:23:30,022][07115] Updated weights for policy 0, policy_version 51652 (0.0009) -[2024-07-05 14:23:32,253][07115] Updated weights for policy 0, policy_version 51662 (0.0008) -[2024-07-05 14:23:33,912][03359] Fps is (10 sec: 36044.8, 60 sec: 36181.3, 300 sec: 34234.0). Total num frames: 403267584. Throughput: 0: 8995.6. Samples: 792388. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:23:33,914][03359] Avg episode reward: [(0, '53.484')] -[2024-07-05 14:23:34,527][07115] Updated weights for policy 0, policy_version 51672 (0.0009) -[2024-07-05 14:23:36,719][07115] Updated weights for policy 0, policy_version 51682 (0.0013) -[2024-07-05 14:23:38,899][07115] Updated weights for policy 0, policy_version 51692 (0.0013) -[2024-07-05 14:23:38,912][03359] Fps is (10 sec: 37683.7, 60 sec: 36317.9, 300 sec: 34406.4). Total num frames: 403456000. Throughput: 0: 9010.3. Samples: 848104. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:23:38,913][03359] Avg episode reward: [(0, '51.705')] -[2024-07-05 14:23:41,096][07115] Updated weights for policy 0, policy_version 51702 (0.0010) -[2024-07-05 14:23:43,307][07115] Updated weights for policy 0, policy_version 51712 (0.0009) -[2024-07-05 14:23:43,912][03359] Fps is (10 sec: 36863.7, 60 sec: 36181.3, 300 sec: 34484.4). Total num frames: 403636224. Throughput: 0: 9092.3. Samples: 903736. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:23:43,914][03359] Avg episode reward: [(0, '53.009')] -[2024-07-05 14:23:45,479][07115] Updated weights for policy 0, policy_version 51722 (0.0009) -[2024-07-05 14:23:47,825][07115] Updated weights for policy 0, policy_version 51732 (0.0013) -[2024-07-05 14:23:48,912][03359] Fps is (10 sec: 36044.6, 60 sec: 36181.4, 300 sec: 34555.4). Total num frames: 403816448. Throughput: 0: 9102.7. Samples: 930848. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:23:48,913][03359] Avg episode reward: [(0, '54.625')] -[2024-07-05 14:23:48,922][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000051737_403824640.pth... -[2024-07-05 14:23:49,007][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000050723_395517952.pth -[2024-07-05 14:23:50,081][07115] Updated weights for policy 0, policy_version 51742 (0.0010) -[2024-07-05 14:23:52,328][07115] Updated weights for policy 0, policy_version 51752 (0.0011) -[2024-07-05 14:23:53,912][03359] Fps is (10 sec: 36044.5, 60 sec: 36181.2, 300 sec: 34620.1). Total num frames: 403996672. Throughput: 0: 9109.2. Samples: 984980. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:23:53,914][03359] Avg episode reward: [(0, '55.024')] -[2024-07-05 14:23:54,595][07115] Updated weights for policy 0, policy_version 51762 (0.0009) -[2024-07-05 14:23:56,800][07115] Updated weights for policy 0, policy_version 51772 (0.0010) -[2024-07-05 14:23:58,913][03359] Fps is (10 sec: 36861.1, 60 sec: 36453.9, 300 sec: 34747.5). Total num frames: 404185088. Throughput: 0: 9111.5. Samples: 1040584. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:23:58,914][03359] Avg episode reward: [(0, '53.333')] -[2024-07-05 14:23:58,993][07115] Updated weights for policy 0, policy_version 51782 (0.0012) -[2024-07-05 14:24:01,155][07115] Updated weights for policy 0, policy_version 51792 (0.0010) -[2024-07-05 14:24:03,360][07115] Updated weights for policy 0, policy_version 51802 (0.0012) -[2024-07-05 14:24:03,912][03359] Fps is (10 sec: 37683.3, 60 sec: 36590.9, 300 sec: 34865.1). Total num frames: 404373504. Throughput: 0: 9123.0. Samples: 1068672. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:24:03,913][03359] Avg episode reward: [(0, '56.448')] -[2024-07-05 14:24:05,496][07115] Updated weights for policy 0, policy_version 51812 (0.0012) -[2024-07-05 14:24:07,689][07115] Updated weights for policy 0, policy_version 51822 (0.0012) -[2024-07-05 14:24:08,912][03359] Fps is (10 sec: 37685.3, 60 sec: 36727.3, 300 sec: 34973.5). Total num frames: 404561920. Throughput: 0: 9206.4. Samples: 1125144. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:24:08,914][03359] Avg episode reward: [(0, '53.772')] -[2024-07-05 14:24:09,826][07115] Updated weights for policy 0, policy_version 51832 (0.0010) -[2024-07-05 14:24:11,935][07115] Updated weights for policy 0, policy_version 51842 (0.0011) -[2024-07-05 14:24:13,912][03359] Fps is (10 sec: 37683.7, 60 sec: 36727.5, 300 sec: 35073.9). Total num frames: 404750336. Throughput: 0: 9274.1. Samples: 1181764. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:24:13,913][03359] Avg episode reward: [(0, '52.149')] -[2024-07-05 14:24:14,187][07115] Updated weights for policy 0, policy_version 51852 (0.0012) -[2024-07-05 14:24:16,331][07115] Updated weights for policy 0, policy_version 51862 (0.0010) -[2024-07-05 14:24:18,516][07115] Updated weights for policy 0, policy_version 51872 (0.0010) -[2024-07-05 14:24:18,912][03359] Fps is (10 sec: 37683.7, 60 sec: 36863.9, 300 sec: 35167.1). Total num frames: 404938752. Throughput: 0: 9290.4. Samples: 1210456. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:24:18,914][03359] Avg episode reward: [(0, '53.294')] -[2024-07-05 14:24:20,718][07115] Updated weights for policy 0, policy_version 51882 (0.0014) -[2024-07-05 14:24:22,853][07115] Updated weights for policy 0, policy_version 51892 (0.0010) -[2024-07-05 14:24:23,913][03359] Fps is (10 sec: 37682.3, 60 sec: 37000.4, 300 sec: 35253.8). Total num frames: 405127168. Throughput: 0: 9308.4. Samples: 1266984. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:24:23,914][03359] Avg episode reward: [(0, '53.101')] -[2024-07-05 14:24:25,006][07115] Updated weights for policy 0, policy_version 51902 (0.0010) -[2024-07-05 14:24:27,189][07115] Updated weights for policy 0, policy_version 51912 (0.0009) -[2024-07-05 14:24:28,912][03359] Fps is (10 sec: 38502.8, 60 sec: 37410.2, 300 sec: 35389.5). Total num frames: 405323776. Throughput: 0: 9331.6. Samples: 1323656. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:24:28,913][03359] Avg episode reward: [(0, '54.220')] -[2024-07-05 14:24:29,350][07115] Updated weights for policy 0, policy_version 51922 (0.0012) -[2024-07-05 14:24:31,515][07115] Updated weights for policy 0, policy_version 51932 (0.0014) -[2024-07-05 14:24:33,663][07115] Updated weights for policy 0, policy_version 51942 (0.0011) -[2024-07-05 14:24:33,912][03359] Fps is (10 sec: 38503.3, 60 sec: 37410.1, 300 sec: 35463.4). Total num frames: 405512192. Throughput: 0: 9362.4. Samples: 1352156. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:24:33,913][03359] Avg episode reward: [(0, '51.213')] -[2024-07-05 14:24:35,841][07115] Updated weights for policy 0, policy_version 51952 (0.0013) -[2024-07-05 14:24:37,979][07115] Updated weights for policy 0, policy_version 51962 (0.0008) -[2024-07-05 14:24:38,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37410.1, 300 sec: 35532.8). Total num frames: 405700608. Throughput: 0: 9424.3. Samples: 1409072. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:24:38,913][03359] Avg episode reward: [(0, '50.596')] -[2024-07-05 14:24:40,173][07115] Updated weights for policy 0, policy_version 51972 (0.0009) -[2024-07-05 14:24:42,360][07115] Updated weights for policy 0, policy_version 51982 (0.0012) -[2024-07-05 14:24:43,912][03359] Fps is (10 sec: 36863.7, 60 sec: 37410.1, 300 sec: 35548.3). Total num frames: 405880832. Throughput: 0: 9438.6. Samples: 1465312. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:24:43,913][03359] Avg episode reward: [(0, '55.569')] -[2024-07-05 14:24:44,569][07115] Updated weights for policy 0, policy_version 51992 (0.0019) -[2024-07-05 14:24:46,725][07115] Updated weights for policy 0, policy_version 52002 (0.0010) -[2024-07-05 14:24:48,912][03359] Fps is (10 sec: 36863.2, 60 sec: 37546.5, 300 sec: 35611.1). Total num frames: 406069248. Throughput: 0: 9444.1. Samples: 1493656. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:24:48,914][03359] Avg episode reward: [(0, '54.836')] -[2024-07-05 14:24:48,930][07115] Updated weights for policy 0, policy_version 52012 (0.0011) -[2024-07-05 14:24:51,115][07115] Updated weights for policy 0, policy_version 52022 (0.0012) -[2024-07-05 14:24:53,256][07115] Updated weights for policy 0, policy_version 52032 (0.0010) -[2024-07-05 14:24:53,915][03359] Fps is (10 sec: 37677.7, 60 sec: 37682.3, 300 sec: 35670.0). Total num frames: 406257664. Throughput: 0: 9446.8. Samples: 1550264. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:24:53,925][03359] Avg episode reward: [(0, '54.112')] -[2024-07-05 14:24:55,469][07115] Updated weights for policy 0, policy_version 52042 (0.0010) -[2024-07-05 14:24:57,624][07115] Updated weights for policy 0, policy_version 52052 (0.0012) -[2024-07-05 14:24:58,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37683.6, 300 sec: 35726.2). Total num frames: 406446080. Throughput: 0: 9431.8. Samples: 1606196. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:24:58,913][03359] Avg episode reward: [(0, '53.292')] -[2024-07-05 14:24:59,822][07115] Updated weights for policy 0, policy_version 52062 (0.0009) -[2024-07-05 14:25:01,976][07115] Updated weights for policy 0, policy_version 52072 (0.0014) -[2024-07-05 14:25:03,912][03359] Fps is (10 sec: 37689.0, 60 sec: 37683.3, 300 sec: 35779.1). Total num frames: 406634496. Throughput: 0: 9426.6. Samples: 1634652. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:25:03,913][03359] Avg episode reward: [(0, '54.978')] -[2024-07-05 14:25:04,139][07115] Updated weights for policy 0, policy_version 52082 (0.0008) -[2024-07-05 14:25:06,414][07115] Updated weights for policy 0, policy_version 52092 (0.0015) -[2024-07-05 14:25:08,587][07115] Updated weights for policy 0, policy_version 52102 (0.0011) -[2024-07-05 14:25:08,913][03359] Fps is (10 sec: 37683.1, 60 sec: 37683.2, 300 sec: 35829.2). Total num frames: 406822912. Throughput: 0: 9406.5. Samples: 1690276. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:25:08,914][03359] Avg episode reward: [(0, '54.655')] -[2024-07-05 14:25:10,716][07115] Updated weights for policy 0, policy_version 52112 (0.0009) -[2024-07-05 14:25:12,910][07115] Updated weights for policy 0, policy_version 52122 (0.0010) -[2024-07-05 14:25:13,912][03359] Fps is (10 sec: 37683.1, 60 sec: 37683.2, 300 sec: 35876.8). Total num frames: 407011328. Throughput: 0: 9412.9. Samples: 1747236. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:25:13,913][03359] Avg episode reward: [(0, '54.304')] -[2024-07-05 14:25:15,079][07115] Updated weights for policy 0, policy_version 52132 (0.0009) -[2024-07-05 14:25:17,270][07115] Updated weights for policy 0, policy_version 52142 (0.0009) -[2024-07-05 14:25:18,912][03359] Fps is (10 sec: 37683.8, 60 sec: 37683.2, 300 sec: 35921.9). Total num frames: 407199744. Throughput: 0: 9401.5. Samples: 1775224. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:25:18,913][03359] Avg episode reward: [(0, '56.090')] -[2024-07-05 14:25:19,442][07115] Updated weights for policy 0, policy_version 52152 (0.0010) -[2024-07-05 14:25:21,644][07115] Updated weights for policy 0, policy_version 52162 (0.0009) -[2024-07-05 14:25:23,828][07115] Updated weights for policy 0, policy_version 52172 (0.0009) -[2024-07-05 14:25:23,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37683.3, 300 sec: 35964.9). Total num frames: 407388160. Throughput: 0: 9384.4. Samples: 1831372. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:25:23,913][03359] Avg episode reward: [(0, '53.449')] -[2024-07-05 14:25:26,005][07115] Updated weights for policy 0, policy_version 52182 (0.0014) -[2024-07-05 14:25:28,163][07115] Updated weights for policy 0, policy_version 52192 (0.0017) -[2024-07-05 14:25:28,912][03359] Fps is (10 sec: 37683.5, 60 sec: 37546.7, 300 sec: 36005.8). Total num frames: 407576576. Throughput: 0: 9393.9. Samples: 1888036. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:25:28,913][03359] Avg episode reward: [(0, '53.156')] -[2024-07-05 14:25:30,289][07115] Updated weights for policy 0, policy_version 52202 (0.0013) -[2024-07-05 14:25:32,389][07115] Updated weights for policy 0, policy_version 52212 (0.0009) -[2024-07-05 14:25:33,912][03359] Fps is (10 sec: 37682.7, 60 sec: 37546.6, 300 sec: 36044.8). Total num frames: 407764992. Throughput: 0: 9410.5. Samples: 1917128. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:25:33,914][03359] Avg episode reward: [(0, '53.386')] -[2024-07-05 14:25:34,588][07115] Updated weights for policy 0, policy_version 52222 (0.0016) -[2024-07-05 14:25:36,770][07115] Updated weights for policy 0, policy_version 52232 (0.0010) -[2024-07-05 14:25:38,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37546.7, 300 sec: 36082.0). Total num frames: 407953408. Throughput: 0: 9399.0. Samples: 1973204. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:25:38,913][03359] Avg episode reward: [(0, '52.276')] -[2024-07-05 14:25:38,986][07115] Updated weights for policy 0, policy_version 52242 (0.0009) -[2024-07-05 14:25:41,150][07115] Updated weights for policy 0, policy_version 52252 (0.0013) -[2024-07-05 14:25:43,348][07115] Updated weights for policy 0, policy_version 52262 (0.0012) -[2024-07-05 14:25:43,919][03359] Fps is (10 sec: 37659.3, 60 sec: 37679.2, 300 sec: 36116.6). Total num frames: 408141824. Throughput: 0: 9421.5. Samples: 2030224. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:25:43,921][03359] Avg episode reward: [(0, '50.337')] -[2024-07-05 14:25:45,510][07115] Updated weights for policy 0, policy_version 52272 (0.0011) -[2024-07-05 14:25:47,688][07115] Updated weights for policy 0, policy_version 52282 (0.0012) -[2024-07-05 14:25:48,915][03359] Fps is (10 sec: 37673.9, 60 sec: 37681.8, 300 sec: 36151.3). Total num frames: 408330240. Throughput: 0: 9413.0. Samples: 2058260. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:25:48,916][03359] Avg episode reward: [(0, '53.600')] -[2024-07-05 14:25:48,953][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000052288_408338432.pth... -[2024-07-05 14:25:49,035][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000051272_400015360.pth -[2024-07-05 14:25:49,843][07115] Updated weights for policy 0, policy_version 52292 (0.0009) -[2024-07-05 14:25:52,076][07115] Updated weights for policy 0, policy_version 52302 (0.0010) -[2024-07-05 14:25:53,912][03359] Fps is (10 sec: 37707.4, 60 sec: 37684.1, 300 sec: 36184.2). Total num frames: 408518656. Throughput: 0: 9421.3. Samples: 2114232. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:25:53,914][03359] Avg episode reward: [(0, '52.570')] -[2024-07-05 14:25:54,256][07115] Updated weights for policy 0, policy_version 52312 (0.0014) -[2024-07-05 14:25:56,431][07115] Updated weights for policy 0, policy_version 52322 (0.0010) -[2024-07-05 14:25:58,601][07115] Updated weights for policy 0, policy_version 52332 (0.0012) -[2024-07-05 14:25:58,912][03359] Fps is (10 sec: 37692.4, 60 sec: 37683.3, 300 sec: 36215.5). Total num frames: 408707072. Throughput: 0: 9407.9. Samples: 2170592. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:25:58,913][03359] Avg episode reward: [(0, '49.463')] -[2024-07-05 14:26:00,817][07115] Updated weights for policy 0, policy_version 52342 (0.0014) -[2024-07-05 14:26:02,957][07115] Updated weights for policy 0, policy_version 52352 (0.0010) -[2024-07-05 14:26:03,912][03359] Fps is (10 sec: 37683.4, 60 sec: 37683.2, 300 sec: 36245.4). Total num frames: 408895488. Throughput: 0: 9410.7. Samples: 2198704. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:26:03,913][03359] Avg episode reward: [(0, '54.847')] -[2024-07-05 14:26:05,143][07115] Updated weights for policy 0, policy_version 52362 (0.0009) -[2024-07-05 14:26:07,299][07115] Updated weights for policy 0, policy_version 52372 (0.0009) -[2024-07-05 14:26:08,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37683.3, 300 sec: 36274.2). Total num frames: 409083904. Throughput: 0: 9428.4. Samples: 2255652. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:26:08,913][03359] Avg episode reward: [(0, '54.024')] -[2024-07-05 14:26:09,467][07115] Updated weights for policy 0, policy_version 52382 (0.0009) -[2024-07-05 14:26:11,650][07115] Updated weights for policy 0, policy_version 52392 (0.0008) -[2024-07-05 14:26:13,861][07115] Updated weights for policy 0, policy_version 52402 (0.0009) -[2024-07-05 14:26:13,913][03359] Fps is (10 sec: 37681.7, 60 sec: 37682.9, 300 sec: 36301.8). Total num frames: 409272320. Throughput: 0: 9418.5. Samples: 2311872. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:26:13,914][03359] Avg episode reward: [(0, '52.171')] -[2024-07-05 14:26:15,958][07115] Updated weights for policy 0, policy_version 52412 (0.0009) -[2024-07-05 14:26:18,181][07115] Updated weights for policy 0, policy_version 52422 (0.0010) -[2024-07-05 14:26:18,912][03359] Fps is (10 sec: 37683.1, 60 sec: 37683.2, 300 sec: 36328.4). Total num frames: 409460736. Throughput: 0: 9415.2. Samples: 2340812. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:26:18,913][03359] Avg episode reward: [(0, '52.642')] -[2024-07-05 14:26:20,299][07115] Updated weights for policy 0, policy_version 52432 (0.0010) -[2024-07-05 14:26:22,444][07115] Updated weights for policy 0, policy_version 52442 (0.0010) -[2024-07-05 14:26:23,912][03359] Fps is (10 sec: 37684.1, 60 sec: 37683.1, 300 sec: 36353.9). Total num frames: 409649152. Throughput: 0: 9418.9. Samples: 2397056. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:26:23,914][03359] Avg episode reward: [(0, '54.373')] -[2024-07-05 14:26:24,659][07115] Updated weights for policy 0, policy_version 52452 (0.0011) -[2024-07-05 14:26:26,865][07115] Updated weights for policy 0, policy_version 52462 (0.0009) -[2024-07-05 14:26:28,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37683.2, 300 sec: 36378.6). Total num frames: 409837568. Throughput: 0: 9404.7. Samples: 2453376. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:26:28,914][03359] Avg episode reward: [(0, '52.515')] -[2024-07-05 14:26:29,042][07115] Updated weights for policy 0, policy_version 52472 (0.0009) -[2024-07-05 14:26:31,245][07115] Updated weights for policy 0, policy_version 52482 (0.0012) -[2024-07-05 14:26:33,352][07115] Updated weights for policy 0, policy_version 52492 (0.0009) -[2024-07-05 14:26:33,912][03359] Fps is (10 sec: 37683.7, 60 sec: 37683.3, 300 sec: 36402.3). Total num frames: 410025984. Throughput: 0: 9408.6. Samples: 2481624. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:26:33,913][03359] Avg episode reward: [(0, '51.959')] -[2024-07-05 14:26:35,507][07115] Updated weights for policy 0, policy_version 52502 (0.0017) -[2024-07-05 14:26:37,699][07115] Updated weights for policy 0, policy_version 52512 (0.0013) -[2024-07-05 14:26:38,921][03359] Fps is (10 sec: 37652.1, 60 sec: 37678.0, 300 sec: 36424.1). Total num frames: 410214400. Throughput: 0: 9416.9. Samples: 2538072. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:26:38,922][03359] Avg episode reward: [(0, '54.476')] -[2024-07-05 14:26:39,865][07115] Updated weights for policy 0, policy_version 52522 (0.0010) -[2024-07-05 14:26:42,033][07115] Updated weights for policy 0, policy_version 52532 (0.0011) -[2024-07-05 14:26:43,915][03359] Fps is (10 sec: 37673.4, 60 sec: 37685.6, 300 sec: 36446.9). Total num frames: 410402816. Throughput: 0: 9436.9. Samples: 2595276. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:26:43,916][03359] Avg episode reward: [(0, '52.963')] -[2024-07-05 14:26:44,148][07115] Updated weights for policy 0, policy_version 52542 (0.0008) -[2024-07-05 14:26:46,381][07115] Updated weights for policy 0, policy_version 52552 (0.0013) -[2024-07-05 14:26:48,523][07115] Updated weights for policy 0, policy_version 52562 (0.0009) -[2024-07-05 14:26:48,912][03359] Fps is (10 sec: 37714.0, 60 sec: 37684.7, 300 sec: 36468.5). Total num frames: 410591232. Throughput: 0: 9443.5. Samples: 2623664. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:26:48,914][03359] Avg episode reward: [(0, '52.338')] -[2024-07-05 14:26:50,633][07115] Updated weights for policy 0, policy_version 52572 (0.0010) -[2024-07-05 14:26:52,853][07115] Updated weights for policy 0, policy_version 52582 (0.0013) -[2024-07-05 14:26:53,913][03359] Fps is (10 sec: 37691.5, 60 sec: 37683.0, 300 sec: 36489.1). Total num frames: 410779648. Throughput: 0: 9439.1. Samples: 2680416. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:26:53,914][03359] Avg episode reward: [(0, '53.150')] -[2024-07-05 14:26:54,996][07115] Updated weights for policy 0, policy_version 52592 (0.0009) -[2024-07-05 14:26:57,155][07115] Updated weights for policy 0, policy_version 52602 (0.0009) -[2024-07-05 14:26:58,915][03359] Fps is (10 sec: 37677.4, 60 sec: 37682.2, 300 sec: 37127.6). Total num frames: 410968064. Throughput: 0: 9438.9. Samples: 2736632. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:26:58,925][03359] Avg episode reward: [(0, '51.835')] -[2024-07-05 14:26:59,394][07115] Updated weights for policy 0, policy_version 52612 (0.0009) -[2024-07-05 14:27:01,539][07115] Updated weights for policy 0, policy_version 52622 (0.0012) -[2024-07-05 14:27:03,730][07115] Updated weights for policy 0, policy_version 52632 (0.0009) -[2024-07-05 14:27:03,912][03359] Fps is (10 sec: 38504.0, 60 sec: 37819.7, 300 sec: 37322.2). Total num frames: 411164672. Throughput: 0: 9421.8. Samples: 2764792. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:03,913][03359] Avg episode reward: [(0, '53.838')] -[2024-07-05 14:27:05,901][07115] Updated weights for policy 0, policy_version 52642 (0.0010) -[2024-07-05 14:27:08,052][07115] Updated weights for policy 0, policy_version 52652 (0.0009) -[2024-07-05 14:27:08,912][03359] Fps is (10 sec: 37689.5, 60 sec: 37683.2, 300 sec: 37294.4). Total num frames: 411344896. Throughput: 0: 9443.4. Samples: 2822008. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:08,913][03359] Avg episode reward: [(0, '53.607')] -[2024-07-05 14:27:10,227][07115] Updated weights for policy 0, policy_version 52662 (0.0012) -[2024-07-05 14:27:12,424][07115] Updated weights for policy 0, policy_version 52672 (0.0015) -[2024-07-05 14:27:13,916][03359] Fps is (10 sec: 36850.0, 60 sec: 37681.1, 300 sec: 37293.9). Total num frames: 411533312. Throughput: 0: 9438.2. Samples: 2878132. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:13,918][03359] Avg episode reward: [(0, '54.683')] -[2024-07-05 14:27:14,609][07115] Updated weights for policy 0, policy_version 52682 (0.0011) -[2024-07-05 14:27:16,747][07115] Updated weights for policy 0, policy_version 52692 (0.0010) -[2024-07-05 14:27:18,867][07115] Updated weights for policy 0, policy_version 52702 (0.0009) -[2024-07-05 14:27:18,912][03359] Fps is (10 sec: 38502.5, 60 sec: 37819.8, 300 sec: 37322.2). Total num frames: 411729920. Throughput: 0: 9432.7. Samples: 2906096. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:18,913][03359] Avg episode reward: [(0, '52.740')] -[2024-07-05 14:27:21,066][07115] Updated weights for policy 0, policy_version 52712 (0.0012) -[2024-07-05 14:27:23,282][07115] Updated weights for policy 0, policy_version 52722 (0.0009) -[2024-07-05 14:27:23,912][03359] Fps is (10 sec: 38517.2, 60 sec: 37819.9, 300 sec: 37350.0). Total num frames: 411918336. Throughput: 0: 9448.3. Samples: 2963168. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:23,913][03359] Avg episode reward: [(0, '51.994')] -[2024-07-05 14:27:25,478][07115] Updated weights for policy 0, policy_version 52732 (0.0010) -[2024-07-05 14:27:27,669][07115] Updated weights for policy 0, policy_version 52742 (0.0010) -[2024-07-05 14:27:28,913][03359] Fps is (10 sec: 36861.3, 60 sec: 37682.8, 300 sec: 37294.3). Total num frames: 412098560. Throughput: 0: 9415.7. Samples: 3018964. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:28,915][03359] Avg episode reward: [(0, '55.649')] -[2024-07-05 14:27:29,830][07115] Updated weights for policy 0, policy_version 52752 (0.0010) -[2024-07-05 14:27:32,014][07115] Updated weights for policy 0, policy_version 52762 (0.0016) -[2024-07-05 14:27:33,913][03359] Fps is (10 sec: 36860.9, 60 sec: 37682.7, 300 sec: 37322.1). Total num frames: 412286976. Throughput: 0: 9416.8. Samples: 3047428. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:33,915][03359] Avg episode reward: [(0, '53.750')] -[2024-07-05 14:27:34,143][07115] Updated weights for policy 0, policy_version 52772 (0.0009) -[2024-07-05 14:27:36,328][07115] Updated weights for policy 0, policy_version 52782 (0.0010) -[2024-07-05 14:27:38,547][07115] Updated weights for policy 0, policy_version 52792 (0.0009) -[2024-07-05 14:27:38,912][03359] Fps is (10 sec: 37685.3, 60 sec: 37688.3, 300 sec: 37322.2). Total num frames: 412475392. Throughput: 0: 9412.6. Samples: 3103980. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:38,916][03359] Avg episode reward: [(0, '55.818')] -[2024-07-05 14:27:40,714][07115] Updated weights for policy 0, policy_version 52802 (0.0009) -[2024-07-05 14:27:42,880][07115] Updated weights for policy 0, policy_version 52812 (0.0010) -[2024-07-05 14:27:43,912][03359] Fps is (10 sec: 38505.3, 60 sec: 37821.4, 300 sec: 37377.7). Total num frames: 412672000. Throughput: 0: 9432.5. Samples: 3161080. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:43,914][03359] Avg episode reward: [(0, '52.741')] -[2024-07-05 14:27:45,053][07115] Updated weights for policy 0, policy_version 52822 (0.0012) -[2024-07-05 14:27:47,193][07115] Updated weights for policy 0, policy_version 52832 (0.0014) -[2024-07-05 14:27:48,913][03359] Fps is (10 sec: 37681.6, 60 sec: 37682.9, 300 sec: 37377.7). Total num frames: 412852224. Throughput: 0: 9426.1. Samples: 3188972. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:48,914][03359] Avg episode reward: [(0, '54.531')] -[2024-07-05 14:27:48,929][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000052840_412860416.pth... -[2024-07-05 14:27:49,005][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000051737_403824640.pth -[2024-07-05 14:27:49,418][07115] Updated weights for policy 0, policy_version 52842 (0.0012) -[2024-07-05 14:27:51,555][07115] Updated weights for policy 0, policy_version 52852 (0.0009) -[2024-07-05 14:27:53,777][07115] Updated weights for policy 0, policy_version 52862 (0.0013) -[2024-07-05 14:27:53,912][03359] Fps is (10 sec: 36864.2, 60 sec: 37683.5, 300 sec: 37433.3). Total num frames: 413040640. Throughput: 0: 9396.3. Samples: 3244844. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:53,913][03359] Avg episode reward: [(0, '52.153')] -[2024-07-05 14:27:55,919][07115] Updated weights for policy 0, policy_version 52872 (0.0011) -[2024-07-05 14:27:58,057][07115] Updated weights for policy 0, policy_version 52882 (0.0012) -[2024-07-05 14:27:58,912][03359] Fps is (10 sec: 37684.6, 60 sec: 37684.1, 300 sec: 37461.0). Total num frames: 413229056. Throughput: 0: 9408.7. Samples: 3301488. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:27:58,914][03359] Avg episode reward: [(0, '50.433')] -[2024-07-05 14:28:00,289][07115] Updated weights for policy 0, policy_version 52892 (0.0009) -[2024-07-05 14:28:02,434][07115] Updated weights for policy 0, policy_version 52902 (0.0010) -[2024-07-05 14:28:03,912][03359] Fps is (10 sec: 37682.7, 60 sec: 37546.6, 300 sec: 37488.8). Total num frames: 413417472. Throughput: 0: 9425.1. Samples: 3330228. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:28:03,914][03359] Avg episode reward: [(0, '51.841')] -[2024-07-05 14:28:04,627][07115] Updated weights for policy 0, policy_version 52912 (0.0012) -[2024-07-05 14:28:06,827][07115] Updated weights for policy 0, policy_version 52922 (0.0013) -[2024-07-05 14:28:08,912][03359] Fps is (10 sec: 37683.9, 60 sec: 37683.2, 300 sec: 37488.8). Total num frames: 413605888. Throughput: 0: 9390.7. Samples: 3385748. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:28:08,913][03359] Avg episode reward: [(0, '54.123')] -[2024-07-05 14:28:09,023][07115] Updated weights for policy 0, policy_version 52932 (0.0010) -[2024-07-05 14:28:11,230][07115] Updated weights for policy 0, policy_version 52942 (0.0011) -[2024-07-05 14:28:13,403][07115] Updated weights for policy 0, policy_version 52952 (0.0010) -[2024-07-05 14:28:13,912][03359] Fps is (10 sec: 37683.8, 60 sec: 37685.6, 300 sec: 37516.6). Total num frames: 413794304. Throughput: 0: 9414.9. Samples: 3442628. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:28:13,913][03359] Avg episode reward: [(0, '55.345')] -[2024-07-05 14:28:15,585][07115] Updated weights for policy 0, policy_version 52962 (0.0009) -[2024-07-05 14:28:17,761][07115] Updated weights for policy 0, policy_version 52972 (0.0014) -[2024-07-05 14:28:18,912][03359] Fps is (10 sec: 37682.4, 60 sec: 37546.5, 300 sec: 37544.3). Total num frames: 413982720. Throughput: 0: 9395.9. Samples: 3470236. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:28:18,913][03359] Avg episode reward: [(0, '54.646')] -[2024-07-05 14:28:19,926][07115] Updated weights for policy 0, policy_version 52982 (0.0010) -[2024-07-05 14:28:22,150][07115] Updated weights for policy 0, policy_version 52992 (0.0014) -[2024-07-05 14:28:23,912][03359] Fps is (10 sec: 37683.1, 60 sec: 37546.7, 300 sec: 37599.9). Total num frames: 414171136. Throughput: 0: 9389.0. Samples: 3526484. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:28:23,913][03359] Avg episode reward: [(0, '53.514')] -[2024-07-05 14:28:24,302][07115] Updated weights for policy 0, policy_version 53002 (0.0013) -[2024-07-05 14:28:26,486][07115] Updated weights for policy 0, policy_version 53012 (0.0010) -[2024-07-05 14:28:28,682][07115] Updated weights for policy 0, policy_version 53022 (0.0008) -[2024-07-05 14:28:28,912][03359] Fps is (10 sec: 37683.7, 60 sec: 37683.6, 300 sec: 37599.9). Total num frames: 414359552. Throughput: 0: 9372.4. Samples: 3582840. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:28:28,913][03359] Avg episode reward: [(0, '54.376')] -[2024-07-05 14:28:30,852][07115] Updated weights for policy 0, policy_version 53032 (0.0013) -[2024-07-05 14:28:33,044][07115] Updated weights for policy 0, policy_version 53042 (0.0010) -[2024-07-05 14:28:33,913][03359] Fps is (10 sec: 36862.5, 60 sec: 37546.9, 300 sec: 37572.1). Total num frames: 414539776. Throughput: 0: 9382.7. Samples: 3611192. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:28:33,914][03359] Avg episode reward: [(0, '54.326')] -[2024-07-05 14:28:35,239][07115] Updated weights for policy 0, policy_version 53052 (0.0010) -[2024-07-05 14:28:37,385][07115] Updated weights for policy 0, policy_version 53062 (0.0013) -[2024-07-05 14:28:38,912][03359] Fps is (10 sec: 36863.8, 60 sec: 37546.7, 300 sec: 37599.9). Total num frames: 414728192. Throughput: 0: 9405.0. Samples: 3668072. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:28:38,914][03359] Avg episode reward: [(0, '53.597')] -[2024-07-05 14:28:39,547][07115] Updated weights for policy 0, policy_version 53072 (0.0011) -[2024-07-05 14:28:41,668][07115] Updated weights for policy 0, policy_version 53082 (0.0009) -[2024-07-05 14:28:43,882][07115] Updated weights for policy 0, policy_version 53092 (0.0013) -[2024-07-05 14:28:43,912][03359] Fps is (10 sec: 38504.0, 60 sec: 37546.7, 300 sec: 37655.4). Total num frames: 414924800. Throughput: 0: 9401.6. Samples: 3724556. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:28:43,913][03359] Avg episode reward: [(0, '52.028')] -[2024-07-05 14:28:46,039][07115] Updated weights for policy 0, policy_version 53102 (0.0009) -[2024-07-05 14:28:48,192][07115] Updated weights for policy 0, policy_version 53112 (0.0016) -[2024-07-05 14:28:48,912][03359] Fps is (10 sec: 38502.7, 60 sec: 37683.5, 300 sec: 37683.2). Total num frames: 415113216. Throughput: 0: 9391.1. Samples: 3752828. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:28:48,913][03359] Avg episode reward: [(0, '53.113')] -[2024-07-05 14:28:50,320][07115] Updated weights for policy 0, policy_version 53122 (0.0015) -[2024-07-05 14:28:52,507][07115] Updated weights for policy 0, policy_version 53132 (0.0009) -[2024-07-05 14:28:53,912][03359] Fps is (10 sec: 37682.7, 60 sec: 37683.1, 300 sec: 37683.3). Total num frames: 415301632. Throughput: 0: 9417.7. Samples: 3809548. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:28:53,913][03359] Avg episode reward: [(0, '55.470')] -[2024-07-05 14:28:54,756][07115] Updated weights for policy 0, policy_version 53142 (0.0009) -[2024-07-05 14:28:56,914][07115] Updated weights for policy 0, policy_version 53152 (0.0009) -[2024-07-05 14:28:58,912][03359] Fps is (10 sec: 37683.4, 60 sec: 37683.3, 300 sec: 37683.2). Total num frames: 415490048. Throughput: 0: 9398.1. Samples: 3865544. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:28:58,914][03359] Avg episode reward: [(0, '54.108')] -[2024-07-05 14:28:59,074][07115] Updated weights for policy 0, policy_version 53162 (0.0010) -[2024-07-05 14:29:01,242][07115] Updated weights for policy 0, policy_version 53172 (0.0010) -[2024-07-05 14:29:03,443][07115] Updated weights for policy 0, policy_version 53182 (0.0013) -[2024-07-05 14:29:03,912][03359] Fps is (10 sec: 37683.5, 60 sec: 37683.2, 300 sec: 37683.2). Total num frames: 415678464. Throughput: 0: 9415.4. Samples: 3893928. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:29:03,913][03359] Avg episode reward: [(0, '54.748')] -[2024-07-05 14:29:05,611][07115] Updated weights for policy 0, policy_version 53192 (0.0009) -[2024-07-05 14:29:07,792][07115] Updated weights for policy 0, policy_version 53202 (0.0009) -[2024-07-05 14:29:08,912][03359] Fps is (10 sec: 37683.1, 60 sec: 37683.2, 300 sec: 37683.2). Total num frames: 415866880. Throughput: 0: 9429.2. Samples: 3950800. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:29:08,913][03359] Avg episode reward: [(0, '53.086')] -[2024-07-05 14:29:09,898][07115] Updated weights for policy 0, policy_version 53212 (0.0010) -[2024-07-05 14:29:12,079][07115] Updated weights for policy 0, policy_version 53222 (0.0011) -[2024-07-05 14:29:13,913][03359] Fps is (10 sec: 37682.4, 60 sec: 37683.0, 300 sec: 37683.2). Total num frames: 416055296. Throughput: 0: 9437.3. Samples: 4007520. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:29:13,914][03359] Avg episode reward: [(0, '53.602')] -[2024-07-05 14:29:14,286][07115] Updated weights for policy 0, policy_version 53232 (0.0017) -[2024-07-05 14:29:16,462][07115] Updated weights for policy 0, policy_version 53242 (0.0009) -[2024-07-05 14:29:18,600][07115] Updated weights for policy 0, policy_version 53252 (0.0014) -[2024-07-05 14:29:18,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37683.3, 300 sec: 37683.2). Total num frames: 416243712. Throughput: 0: 9432.4. Samples: 4035644. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:29:18,913][03359] Avg episode reward: [(0, '54.914')] -[2024-07-05 14:29:20,787][07115] Updated weights for policy 0, policy_version 53262 (0.0010) -[2024-07-05 14:29:22,937][07115] Updated weights for policy 0, policy_version 53272 (0.0010) -[2024-07-05 14:29:23,913][03359] Fps is (10 sec: 37683.0, 60 sec: 37683.0, 300 sec: 37655.4). Total num frames: 416432128. Throughput: 0: 9425.3. Samples: 4092212. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:29:23,914][03359] Avg episode reward: [(0, '53.597')] -[2024-07-05 14:29:25,158][07115] Updated weights for policy 0, policy_version 53282 (0.0011) -[2024-07-05 14:29:27,306][07115] Updated weights for policy 0, policy_version 53292 (0.0009) -[2024-07-05 14:29:28,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37683.3, 300 sec: 37655.4). Total num frames: 416620544. Throughput: 0: 9428.5. Samples: 4148840. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:29:28,914][03359] Avg episode reward: [(0, '54.276')] -[2024-07-05 14:29:29,492][07115] Updated weights for policy 0, policy_version 53302 (0.0008) -[2024-07-05 14:29:31,689][07115] Updated weights for policy 0, policy_version 53312 (0.0009) -[2024-07-05 14:29:33,857][07115] Updated weights for policy 0, policy_version 53322 (0.0010) -[2024-07-05 14:29:33,912][03359] Fps is (10 sec: 37684.4, 60 sec: 37820.0, 300 sec: 37655.4). Total num frames: 416808960. Throughput: 0: 9423.1. Samples: 4176868. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:29:33,913][03359] Avg episode reward: [(0, '55.641')] -[2024-07-05 14:29:36,025][07115] Updated weights for policy 0, policy_version 53332 (0.0012) -[2024-07-05 14:29:38,191][07115] Updated weights for policy 0, policy_version 53342 (0.0010) -[2024-07-05 14:29:38,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37819.8, 300 sec: 37683.2). Total num frames: 416997376. Throughput: 0: 9416.7. Samples: 4233300. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:29:38,913][03359] Avg episode reward: [(0, '55.778')] -[2024-07-05 14:29:40,365][07115] Updated weights for policy 0, policy_version 53352 (0.0013) -[2024-07-05 14:29:42,581][07115] Updated weights for policy 0, policy_version 53362 (0.0010) -[2024-07-05 14:29:43,912][03359] Fps is (10 sec: 37683.0, 60 sec: 37683.2, 300 sec: 37683.2). Total num frames: 417185792. Throughput: 0: 9428.3. Samples: 4289820. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:29:43,914][03359] Avg episode reward: [(0, '54.923')] -[2024-07-05 14:29:44,735][07115] Updated weights for policy 0, policy_version 53372 (0.0008) -[2024-07-05 14:29:46,969][07115] Updated weights for policy 0, policy_version 53382 (0.0012) -[2024-07-05 14:29:48,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37683.3, 300 sec: 37683.4). Total num frames: 417374208. Throughput: 0: 9421.3. Samples: 4317884. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:29:48,913][03359] Avg episode reward: [(0, '54.146')] -[2024-07-05 14:29:48,920][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000053391_417374208.pth... -[2024-07-05 14:29:48,999][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000052288_408338432.pth -[2024-07-05 14:29:49,131][07115] Updated weights for policy 0, policy_version 53392 (0.0009) -[2024-07-05 14:29:51,327][07115] Updated weights for policy 0, policy_version 53402 (0.0011) -[2024-07-05 14:29:53,470][07115] Updated weights for policy 0, policy_version 53412 (0.0013) -[2024-07-05 14:29:53,912][03359] Fps is (10 sec: 37683.4, 60 sec: 37683.3, 300 sec: 37683.2). Total num frames: 417562624. Throughput: 0: 9409.2. Samples: 4374212. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:29:53,914][03359] Avg episode reward: [(0, '55.554')] -[2024-07-05 14:29:55,612][07115] Updated weights for policy 0, policy_version 53422 (0.0009) -[2024-07-05 14:29:57,858][07115] Updated weights for policy 0, policy_version 53432 (0.0013) -[2024-07-05 14:29:58,915][03359] Fps is (10 sec: 36852.0, 60 sec: 37544.7, 300 sec: 37655.0). Total num frames: 417742848. Throughput: 0: 9386.5. Samples: 4429940. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:29:58,918][03359] Avg episode reward: [(0, '54.452')] -[2024-07-05 14:30:00,013][07115] Updated weights for policy 0, policy_version 53442 (0.0013) -[2024-07-05 14:30:02,284][07115] Updated weights for policy 0, policy_version 53452 (0.0009) -[2024-07-05 14:30:03,913][03359] Fps is (10 sec: 36863.0, 60 sec: 37546.5, 300 sec: 37655.4). Total num frames: 417931264. Throughput: 0: 9395.2. Samples: 4458432. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:30:03,914][03359] Avg episode reward: [(0, '54.790')] -[2024-07-05 14:30:04,434][07115] Updated weights for policy 0, policy_version 53462 (0.0010) -[2024-07-05 14:30:06,618][07115] Updated weights for policy 0, policy_version 53472 (0.0012) -[2024-07-05 14:30:08,777][07115] Updated weights for policy 0, policy_version 53482 (0.0014) -[2024-07-05 14:30:08,912][03359] Fps is (10 sec: 37695.3, 60 sec: 37546.7, 300 sec: 37655.4). Total num frames: 418119680. Throughput: 0: 9383.6. Samples: 4514472. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:30:08,913][03359] Avg episode reward: [(0, '56.166')] -[2024-07-05 14:30:10,973][07115] Updated weights for policy 0, policy_version 53492 (0.0009) -[2024-07-05 14:30:13,148][07115] Updated weights for policy 0, policy_version 53502 (0.0015) -[2024-07-05 14:30:13,912][03359] Fps is (10 sec: 37684.3, 60 sec: 37546.8, 300 sec: 37655.4). Total num frames: 418308096. Throughput: 0: 9380.8. Samples: 4570976. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:30:13,913][03359] Avg episode reward: [(0, '55.733')] -[2024-07-05 14:30:15,336][07115] Updated weights for policy 0, policy_version 53512 (0.0013) -[2024-07-05 14:30:17,516][07115] Updated weights for policy 0, policy_version 53522 (0.0009) -[2024-07-05 14:30:18,912][03359] Fps is (10 sec: 37682.9, 60 sec: 37546.6, 300 sec: 37655.4). Total num frames: 418496512. Throughput: 0: 9380.0. Samples: 4598968. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:30:18,913][03359] Avg episode reward: [(0, '53.497')] -[2024-07-05 14:30:19,754][07115] Updated weights for policy 0, policy_version 53532 (0.0010) -[2024-07-05 14:30:21,925][07115] Updated weights for policy 0, policy_version 53542 (0.0009) -[2024-07-05 14:30:23,912][03359] Fps is (10 sec: 37683.1, 60 sec: 37546.8, 300 sec: 37655.4). Total num frames: 418684928. Throughput: 0: 9385.2. Samples: 4655636. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:30:23,913][03359] Avg episode reward: [(0, '53.762')] -[2024-07-05 14:30:24,068][07115] Updated weights for policy 0, policy_version 53552 (0.0010) -[2024-07-05 14:30:26,238][07115] Updated weights for policy 0, policy_version 53562 (0.0012) -[2024-07-05 14:30:28,399][07115] Updated weights for policy 0, policy_version 53572 (0.0010) -[2024-07-05 14:30:28,912][03359] Fps is (10 sec: 37683.5, 60 sec: 37546.7, 300 sec: 37655.4). Total num frames: 418873344. Throughput: 0: 9369.3. Samples: 4711436. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:30:28,913][03359] Avg episode reward: [(0, '57.407')] -[2024-07-05 14:30:28,918][07095] Saving new best policy, reward=57.407! -[2024-07-05 14:30:30,633][07115] Updated weights for policy 0, policy_version 53582 (0.0010) -[2024-07-05 14:30:32,832][07115] Updated weights for policy 0, policy_version 53592 (0.0010) -[2024-07-05 14:30:33,912][03359] Fps is (10 sec: 36864.0, 60 sec: 37410.1, 300 sec: 37627.7). Total num frames: 419053568. Throughput: 0: 9373.0. Samples: 4739672. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:30:33,914][03359] Avg episode reward: [(0, '53.336')] -[2024-07-05 14:30:34,992][07115] Updated weights for policy 0, policy_version 53602 (0.0009) -[2024-07-05 14:30:37,206][07115] Updated weights for policy 0, policy_version 53612 (0.0011) -[2024-07-05 14:30:38,912][03359] Fps is (10 sec: 36863.8, 60 sec: 37410.1, 300 sec: 37628.5). Total num frames: 419241984. Throughput: 0: 9368.5. Samples: 4795796. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:30:38,913][03359] Avg episode reward: [(0, '52.925')] -[2024-07-05 14:30:39,405][07115] Updated weights for policy 0, policy_version 53622 (0.0011) -[2024-07-05 14:30:41,570][07115] Updated weights for policy 0, policy_version 53632 (0.0014) -[2024-07-05 14:30:43,790][07115] Updated weights for policy 0, policy_version 53642 (0.0009) -[2024-07-05 14:30:43,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37410.1, 300 sec: 37628.0). Total num frames: 419430400. Throughput: 0: 9390.2. Samples: 4852468. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:30:43,913][03359] Avg episode reward: [(0, '55.262')] -[2024-07-05 14:30:45,954][07115] Updated weights for policy 0, policy_version 53652 (0.0009) -[2024-07-05 14:30:48,064][07115] Updated weights for policy 0, policy_version 53662 (0.0012) -[2024-07-05 14:30:48,912][03359] Fps is (10 sec: 37682.8, 60 sec: 37410.0, 300 sec: 37627.7). Total num frames: 419618816. Throughput: 0: 9384.8. Samples: 4880748. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:30:48,913][03359] Avg episode reward: [(0, '53.489')] -[2024-07-05 14:30:50,251][07115] Updated weights for policy 0, policy_version 53672 (0.0010) -[2024-07-05 14:30:52,420][07115] Updated weights for policy 0, policy_version 53682 (0.0009) -[2024-07-05 14:30:53,913][03359] Fps is (10 sec: 37682.3, 60 sec: 37410.0, 300 sec: 37627.6). Total num frames: 419807232. Throughput: 0: 9387.0. Samples: 4936888. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:30:53,914][03359] Avg episode reward: [(0, '54.155')] -[2024-07-05 14:30:54,575][07115] Updated weights for policy 0, policy_version 53692 (0.0012) -[2024-07-05 14:30:56,798][07115] Updated weights for policy 0, policy_version 53702 (0.0008) -[2024-07-05 14:30:58,915][03359] Fps is (10 sec: 37672.2, 60 sec: 37546.8, 300 sec: 37627.3). Total num frames: 419995648. Throughput: 0: 9384.0. Samples: 4993284. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:30:58,917][03359] Avg episode reward: [(0, '53.735')] -[2024-07-05 14:30:58,954][07115] Updated weights for policy 0, policy_version 53712 (0.0012) -[2024-07-05 14:31:01,142][07115] Updated weights for policy 0, policy_version 53722 (0.0009) -[2024-07-05 14:31:03,382][07115] Updated weights for policy 0, policy_version 53732 (0.0009) -[2024-07-05 14:31:03,919][03359] Fps is (10 sec: 37660.1, 60 sec: 37542.8, 300 sec: 37626.8). Total num frames: 420184064. Throughput: 0: 9392.2. Samples: 5021676. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:03,920][03359] Avg episode reward: [(0, '53.558')] -[2024-07-05 14:31:05,578][07115] Updated weights for policy 0, policy_version 53742 (0.0009) -[2024-07-05 14:31:07,729][07115] Updated weights for policy 0, policy_version 53752 (0.0011) -[2024-07-05 14:31:08,912][03359] Fps is (10 sec: 37694.8, 60 sec: 37546.7, 300 sec: 37627.7). Total num frames: 420372480. Throughput: 0: 9382.1. Samples: 5077828. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:08,913][03359] Avg episode reward: [(0, '53.574')] -[2024-07-05 14:31:09,916][07115] Updated weights for policy 0, policy_version 53762 (0.0009) -[2024-07-05 14:31:12,047][07115] Updated weights for policy 0, policy_version 53772 (0.0009) -[2024-07-05 14:31:13,912][03359] Fps is (10 sec: 37707.3, 60 sec: 37546.7, 300 sec: 37627.7). Total num frames: 420560896. Throughput: 0: 9402.2. Samples: 5134536. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:13,913][03359] Avg episode reward: [(0, '53.451')] -[2024-07-05 14:31:14,200][07115] Updated weights for policy 0, policy_version 53782 (0.0009) -[2024-07-05 14:31:16,379][07115] Updated weights for policy 0, policy_version 53792 (0.0010) -[2024-07-05 14:31:18,546][07115] Updated weights for policy 0, policy_version 53802 (0.0009) -[2024-07-05 14:31:18,912][03359] Fps is (10 sec: 37683.1, 60 sec: 37546.7, 300 sec: 37627.7). Total num frames: 420749312. Throughput: 0: 9395.7. Samples: 5162476. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:18,913][03359] Avg episode reward: [(0, '55.807')] -[2024-07-05 14:31:20,746][07115] Updated weights for policy 0, policy_version 53812 (0.0009) -[2024-07-05 14:31:22,931][07115] Updated weights for policy 0, policy_version 53822 (0.0010) -[2024-07-05 14:31:23,913][03359] Fps is (10 sec: 37682.5, 60 sec: 37546.6, 300 sec: 37627.6). Total num frames: 420937728. Throughput: 0: 9396.9. Samples: 5218660. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:23,914][03359] Avg episode reward: [(0, '51.639')] -[2024-07-05 14:31:25,108][07115] Updated weights for policy 0, policy_version 53832 (0.0009) -[2024-07-05 14:31:27,311][07115] Updated weights for policy 0, policy_version 53842 (0.0009) -[2024-07-05 14:31:28,912][03359] Fps is (10 sec: 37683.0, 60 sec: 37546.6, 300 sec: 37627.7). Total num frames: 421126144. Throughput: 0: 9405.0. Samples: 5275692. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:28,914][03359] Avg episode reward: [(0, '53.824')] -[2024-07-05 14:31:29,469][07115] Updated weights for policy 0, policy_version 53852 (0.0011) -[2024-07-05 14:31:31,646][07115] Updated weights for policy 0, policy_version 53862 (0.0009) -[2024-07-05 14:31:33,823][07115] Updated weights for policy 0, policy_version 53872 (0.0009) -[2024-07-05 14:31:33,912][03359] Fps is (10 sec: 37684.0, 60 sec: 37683.2, 300 sec: 37628.7). Total num frames: 421314560. Throughput: 0: 9394.5. Samples: 5303500. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:33,913][03359] Avg episode reward: [(0, '52.887')] -[2024-07-05 14:31:36,037][07115] Updated weights for policy 0, policy_version 53882 (0.0009) -[2024-07-05 14:31:38,214][07115] Updated weights for policy 0, policy_version 53892 (0.0009) -[2024-07-05 14:31:38,912][03359] Fps is (10 sec: 37683.4, 60 sec: 37683.2, 300 sec: 37628.0). Total num frames: 421502976. Throughput: 0: 9405.5. Samples: 5360132. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:38,913][03359] Avg episode reward: [(0, '53.158')] -[2024-07-05 14:31:40,410][07115] Updated weights for policy 0, policy_version 53902 (0.0010) -[2024-07-05 14:31:42,571][07115] Updated weights for policy 0, policy_version 53912 (0.0010) -[2024-07-05 14:31:43,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37683.2, 300 sec: 37627.7). Total num frames: 421691392. Throughput: 0: 9385.7. Samples: 5415612. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:43,913][03359] Avg episode reward: [(0, '53.570')] -[2024-07-05 14:31:44,827][07115] Updated weights for policy 0, policy_version 53922 (0.0009) -[2024-07-05 14:31:47,030][07115] Updated weights for policy 0, policy_version 53932 (0.0009) -[2024-07-05 14:31:48,912][03359] Fps is (10 sec: 36863.8, 60 sec: 37546.7, 300 sec: 37599.9). Total num frames: 421871616. Throughput: 0: 9374.3. Samples: 5443460. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:48,914][03359] Avg episode reward: [(0, '52.020')] -[2024-07-05 14:31:48,936][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000053941_421879808.pth... -[2024-07-05 14:31:49,021][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000052840_412860416.pth -[2024-07-05 14:31:49,201][07115] Updated weights for policy 0, policy_version 53942 (0.0009) -[2024-07-05 14:31:51,362][07115] Updated weights for policy 0, policy_version 53952 (0.0010) -[2024-07-05 14:31:53,507][07115] Updated weights for policy 0, policy_version 53962 (0.0009) -[2024-07-05 14:31:53,912][03359] Fps is (10 sec: 36863.9, 60 sec: 37546.8, 300 sec: 37600.1). Total num frames: 422060032. Throughput: 0: 9384.3. Samples: 5500124. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:31:53,913][03359] Avg episode reward: [(0, '54.985')] -[2024-07-05 14:31:55,683][07115] Updated weights for policy 0, policy_version 53972 (0.0008) -[2024-07-05 14:31:57,884][07115] Updated weights for policy 0, policy_version 53982 (0.0009) -[2024-07-05 14:31:58,913][03359] Fps is (10 sec: 37682.3, 60 sec: 37548.4, 300 sec: 37572.1). Total num frames: 422248448. Throughput: 0: 9374.9. Samples: 5556408. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:31:58,914][03359] Avg episode reward: [(0, '54.679')] -[2024-07-05 14:32:00,055][07115] Updated weights for policy 0, policy_version 53992 (0.0009) -[2024-07-05 14:32:02,233][07115] Updated weights for policy 0, policy_version 54002 (0.0010) -[2024-07-05 14:32:03,912][03359] Fps is (10 sec: 37683.4, 60 sec: 37550.7, 300 sec: 37599.9). Total num frames: 422436864. Throughput: 0: 9384.6. Samples: 5584784. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:32:03,914][03359] Avg episode reward: [(0, '55.497')] -[2024-07-05 14:32:04,475][07115] Updated weights for policy 0, policy_version 54012 (0.0009) -[2024-07-05 14:32:06,648][07115] Updated weights for policy 0, policy_version 54022 (0.0014) -[2024-07-05 14:32:08,801][07115] Updated weights for policy 0, policy_version 54032 (0.0009) -[2024-07-05 14:32:08,912][03359] Fps is (10 sec: 37684.3, 60 sec: 37546.6, 300 sec: 37600.4). Total num frames: 422625280. Throughput: 0: 9379.2. Samples: 5640720. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:32:08,913][03359] Avg episode reward: [(0, '56.203')] -[2024-07-05 14:32:10,993][07115] Updated weights for policy 0, policy_version 54042 (0.0012) -[2024-07-05 14:32:13,235][07115] Updated weights for policy 0, policy_version 54052 (0.0011) -[2024-07-05 14:32:13,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37546.7, 300 sec: 37572.1). Total num frames: 422813696. Throughput: 0: 9357.2. Samples: 5696764. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:32:13,913][03359] Avg episode reward: [(0, '51.943')] -[2024-07-05 14:32:15,450][07115] Updated weights for policy 0, policy_version 54062 (0.0015) -[2024-07-05 14:32:17,623][07115] Updated weights for policy 0, policy_version 54072 (0.0013) -[2024-07-05 14:32:18,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37546.7, 300 sec: 37572.1). Total num frames: 423002112. Throughput: 0: 9365.4. Samples: 5724944. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:32:18,913][03359] Avg episode reward: [(0, '53.670')] -[2024-07-05 14:32:19,780][07115] Updated weights for policy 0, policy_version 54082 (0.0013) -[2024-07-05 14:32:21,947][07115] Updated weights for policy 0, policy_version 54092 (0.0016) -[2024-07-05 14:32:23,913][03359] Fps is (10 sec: 36860.8, 60 sec: 37409.8, 300 sec: 37572.1). Total num frames: 423182336. Throughput: 0: 9355.8. Samples: 5781152. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:32:23,925][03359] Avg episode reward: [(0, '53.074')] -[2024-07-05 14:32:24,118][07115] Updated weights for policy 0, policy_version 54102 (0.0010) -[2024-07-05 14:32:26,336][07115] Updated weights for policy 0, policy_version 54112 (0.0009) -[2024-07-05 14:32:28,522][07115] Updated weights for policy 0, policy_version 54122 (0.0014) -[2024-07-05 14:32:28,912][03359] Fps is (10 sec: 36863.5, 60 sec: 37410.1, 300 sec: 37572.2). Total num frames: 423370752. Throughput: 0: 9366.3. Samples: 5837096. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:32:28,913][03359] Avg episode reward: [(0, '57.232')] -[2024-07-05 14:32:30,663][07115] Updated weights for policy 0, policy_version 54132 (0.0009) -[2024-07-05 14:32:32,911][07115] Updated weights for policy 0, policy_version 54142 (0.0010) -[2024-07-05 14:32:33,912][03359] Fps is (10 sec: 37686.2, 60 sec: 37410.1, 300 sec: 37572.1). Total num frames: 423559168. Throughput: 0: 9383.1. Samples: 5865700. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:32:33,913][03359] Avg episode reward: [(0, '52.893')] -[2024-07-05 14:32:35,055][07115] Updated weights for policy 0, policy_version 54152 (0.0012) -[2024-07-05 14:32:37,276][07115] Updated weights for policy 0, policy_version 54162 (0.0009) -[2024-07-05 14:32:38,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37410.1, 300 sec: 37544.3). Total num frames: 423747584. Throughput: 0: 9370.7. Samples: 5921808. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:32:38,914][03359] Avg episode reward: [(0, '51.770')] -[2024-07-05 14:32:39,432][07115] Updated weights for policy 0, policy_version 54172 (0.0010) -[2024-07-05 14:32:41,611][07115] Updated weights for policy 0, policy_version 54182 (0.0009) -[2024-07-05 14:32:43,818][07115] Updated weights for policy 0, policy_version 54192 (0.0009) -[2024-07-05 14:32:43,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37410.1, 300 sec: 37572.2). Total num frames: 423936000. Throughput: 0: 9371.6. Samples: 5978128. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:32:43,914][03359] Avg episode reward: [(0, '53.720')] -[2024-07-05 14:32:46,016][07115] Updated weights for policy 0, policy_version 54202 (0.0010) -[2024-07-05 14:32:48,139][07115] Updated weights for policy 0, policy_version 54212 (0.0009) -[2024-07-05 14:32:48,912][03359] Fps is (10 sec: 37683.1, 60 sec: 37546.6, 300 sec: 37572.1). Total num frames: 424124416. Throughput: 0: 9355.1. Samples: 6005764. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:32:48,914][03359] Avg episode reward: [(0, '53.585')] -[2024-07-05 14:32:50,384][07115] Updated weights for policy 0, policy_version 54222 (0.0013) -[2024-07-05 14:32:52,570][07115] Updated weights for policy 0, policy_version 54232 (0.0012) -[2024-07-05 14:32:53,912][03359] Fps is (10 sec: 37682.8, 60 sec: 37546.6, 300 sec: 37572.1). Total num frames: 424312832. Throughput: 0: 9370.8. Samples: 6062408. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:32:53,914][03359] Avg episode reward: [(0, '53.810')] -[2024-07-05 14:32:54,747][07115] Updated weights for policy 0, policy_version 54242 (0.0010) -[2024-07-05 14:32:56,947][07115] Updated weights for policy 0, policy_version 54252 (0.0015) -[2024-07-05 14:32:58,912][03359] Fps is (10 sec: 37683.9, 60 sec: 37546.9, 300 sec: 37572.1). Total num frames: 424501248. Throughput: 0: 9378.9. Samples: 6118816. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:32:58,913][03359] Avg episode reward: [(0, '51.936')] -[2024-07-05 14:32:59,087][07115] Updated weights for policy 0, policy_version 54262 (0.0009) -[2024-07-05 14:33:01,244][07115] Updated weights for policy 0, policy_version 54272 (0.0009) -[2024-07-05 14:33:03,451][07115] Updated weights for policy 0, policy_version 54282 (0.0013) -[2024-07-05 14:33:03,912][03359] Fps is (10 sec: 37683.4, 60 sec: 37546.6, 300 sec: 37572.1). Total num frames: 424689664. Throughput: 0: 9367.0. Samples: 6146460. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:03,913][03359] Avg episode reward: [(0, '53.642')] -[2024-07-05 14:33:05,638][07115] Updated weights for policy 0, policy_version 54292 (0.0011) -[2024-07-05 14:33:07,780][07115] Updated weights for policy 0, policy_version 54302 (0.0009) -[2024-07-05 14:33:08,913][03359] Fps is (10 sec: 36862.6, 60 sec: 37409.9, 300 sec: 37544.3). Total num frames: 424869888. Throughput: 0: 9379.0. Samples: 6203204. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:08,914][03359] Avg episode reward: [(0, '56.486')] -[2024-07-05 14:33:09,969][07115] Updated weights for policy 0, policy_version 54312 (0.0014) -[2024-07-05 14:33:12,192][07115] Updated weights for policy 0, policy_version 54322 (0.0012) -[2024-07-05 14:33:13,919][03359] Fps is (10 sec: 36842.5, 60 sec: 37406.4, 300 sec: 37543.6). Total num frames: 425058304. Throughput: 0: 9377.5. Samples: 6259136. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:13,921][03359] Avg episode reward: [(0, '54.125')] -[2024-07-05 14:33:14,415][07115] Updated weights for policy 0, policy_version 54332 (0.0009) -[2024-07-05 14:33:16,642][07115] Updated weights for policy 0, policy_version 54342 (0.0010) -[2024-07-05 14:33:18,834][07115] Updated weights for policy 0, policy_version 54352 (0.0010) -[2024-07-05 14:33:18,917][03359] Fps is (10 sec: 37667.4, 60 sec: 37407.3, 300 sec: 37543.8). Total num frames: 425246720. Throughput: 0: 9361.0. Samples: 6286988. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:18,930][03359] Avg episode reward: [(0, '56.130')] -[2024-07-05 14:33:21,031][07115] Updated weights for policy 0, policy_version 54362 (0.0010) -[2024-07-05 14:33:23,195][07115] Updated weights for policy 0, policy_version 54372 (0.0012) -[2024-07-05 14:33:23,912][03359] Fps is (10 sec: 37705.2, 60 sec: 37547.2, 300 sec: 37544.4). Total num frames: 425435136. Throughput: 0: 9363.5. Samples: 6343164. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:23,913][03359] Avg episode reward: [(0, '54.144')] -[2024-07-05 14:33:25,372][07115] Updated weights for policy 0, policy_version 54382 (0.0012) -[2024-07-05 14:33:27,572][07115] Updated weights for policy 0, policy_version 54392 (0.0013) -[2024-07-05 14:33:28,912][03359] Fps is (10 sec: 37700.4, 60 sec: 37546.7, 300 sec: 37572.2). Total num frames: 425623552. Throughput: 0: 9349.8. Samples: 6398868. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:28,913][03359] Avg episode reward: [(0, '53.422')] -[2024-07-05 14:33:29,799][07115] Updated weights for policy 0, policy_version 54402 (0.0011) -[2024-07-05 14:33:32,032][07115] Updated weights for policy 0, policy_version 54412 (0.0008) -[2024-07-05 14:33:33,913][03359] Fps is (10 sec: 36860.1, 60 sec: 37409.5, 300 sec: 37544.2). Total num frames: 425803776. Throughput: 0: 9348.3. Samples: 6426448. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:33,926][03359] Avg episode reward: [(0, '54.556')] -[2024-07-05 14:33:34,236][07115] Updated weights for policy 0, policy_version 54422 (0.0009) -[2024-07-05 14:33:36,399][07115] Updated weights for policy 0, policy_version 54432 (0.0009) -[2024-07-05 14:33:38,598][07115] Updated weights for policy 0, policy_version 54442 (0.0009) -[2024-07-05 14:33:38,913][03359] Fps is (10 sec: 36863.0, 60 sec: 37410.0, 300 sec: 37516.5). Total num frames: 425992192. Throughput: 0: 9335.3. Samples: 6482496. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:38,914][03359] Avg episode reward: [(0, '55.951')] -[2024-07-05 14:33:40,740][07115] Updated weights for policy 0, policy_version 54452 (0.0010) -[2024-07-05 14:33:42,965][07115] Updated weights for policy 0, policy_version 54462 (0.0014) -[2024-07-05 14:33:43,912][03359] Fps is (10 sec: 37687.4, 60 sec: 37410.1, 300 sec: 37516.6). Total num frames: 426180608. Throughput: 0: 9329.4. Samples: 6538640. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:43,913][03359] Avg episode reward: [(0, '54.027')] -[2024-07-05 14:33:45,172][07115] Updated weights for policy 0, policy_version 54472 (0.0010) -[2024-07-05 14:33:47,382][07115] Updated weights for policy 0, policy_version 54482 (0.0011) -[2024-07-05 14:33:48,912][03359] Fps is (10 sec: 36865.0, 60 sec: 37273.7, 300 sec: 37488.8). Total num frames: 426360832. Throughput: 0: 9334.0. Samples: 6566488. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:48,913][03359] Avg episode reward: [(0, '53.403')] -[2024-07-05 14:33:48,936][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000054489_426369024.pth... -[2024-07-05 14:33:49,020][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000053391_417374208.pth -[2024-07-05 14:33:49,601][07115] Updated weights for policy 0, policy_version 54492 (0.0019) -[2024-07-05 14:33:51,810][07115] Updated weights for policy 0, policy_version 54502 (0.0009) -[2024-07-05 14:33:53,912][03359] Fps is (10 sec: 36863.5, 60 sec: 37273.6, 300 sec: 37488.8). Total num frames: 426549248. Throughput: 0: 9317.8. Samples: 6622504. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:53,914][03359] Avg episode reward: [(0, '53.992')] -[2024-07-05 14:33:54,003][07115] Updated weights for policy 0, policy_version 54512 (0.0009) -[2024-07-05 14:33:56,164][07115] Updated weights for policy 0, policy_version 54522 (0.0009) -[2024-07-05 14:33:58,335][07115] Updated weights for policy 0, policy_version 54532 (0.0010) -[2024-07-05 14:33:58,912][03359] Fps is (10 sec: 37682.8, 60 sec: 37273.5, 300 sec: 37488.8). Total num frames: 426737664. Throughput: 0: 9333.3. Samples: 6679080. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:33:58,914][03359] Avg episode reward: [(0, '53.421')] -[2024-07-05 14:34:00,492][07115] Updated weights for policy 0, policy_version 54542 (0.0010) -[2024-07-05 14:34:02,686][07115] Updated weights for policy 0, policy_version 54552 (0.0010) -[2024-07-05 14:34:03,912][03359] Fps is (10 sec: 37683.6, 60 sec: 37273.6, 300 sec: 37488.8). Total num frames: 426926080. Throughput: 0: 9341.5. Samples: 6707312. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:34:03,914][03359] Avg episode reward: [(0, '55.635')] -[2024-07-05 14:34:04,899][07115] Updated weights for policy 0, policy_version 54562 (0.0013) -[2024-07-05 14:34:07,109][07115] Updated weights for policy 0, policy_version 54572 (0.0008) -[2024-07-05 14:34:08,912][03359] Fps is (10 sec: 37683.4, 60 sec: 37410.3, 300 sec: 37488.8). Total num frames: 427114496. Throughput: 0: 9327.5. Samples: 6762900. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:34:08,913][03359] Avg episode reward: [(0, '53.831')] -[2024-07-05 14:34:09,310][07115] Updated weights for policy 0, policy_version 54582 (0.0014) -[2024-07-05 14:34:11,479][07115] Updated weights for policy 0, policy_version 54592 (0.0009) -[2024-07-05 14:34:13,713][07115] Updated weights for policy 0, policy_version 54602 (0.0013) -[2024-07-05 14:34:13,912][03359] Fps is (10 sec: 36863.9, 60 sec: 37277.2, 300 sec: 37461.0). Total num frames: 427294720. Throughput: 0: 9329.9. Samples: 6818716. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:34:13,913][03359] Avg episode reward: [(0, '55.015')] -[2024-07-05 14:34:15,896][07115] Updated weights for policy 0, policy_version 54612 (0.0010) -[2024-07-05 14:34:18,084][07115] Updated weights for policy 0, policy_version 54622 (0.0011) -[2024-07-05 14:34:18,918][03359] Fps is (10 sec: 36843.9, 60 sec: 37273.0, 300 sec: 37460.4). Total num frames: 427483136. Throughput: 0: 9351.2. Samples: 6847292. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:34:18,919][03359] Avg episode reward: [(0, '56.066')] -[2024-07-05 14:34:20,286][07115] Updated weights for policy 0, policy_version 54632 (0.0009) -[2024-07-05 14:34:22,506][07115] Updated weights for policy 0, policy_version 54642 (0.0009) -[2024-07-05 14:34:23,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37273.6, 300 sec: 37461.0). Total num frames: 427671552. Throughput: 0: 9334.4. Samples: 6902540. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:34:23,913][03359] Avg episode reward: [(0, '53.930')] -[2024-07-05 14:34:24,678][07115] Updated weights for policy 0, policy_version 54652 (0.0009) -[2024-07-05 14:34:26,894][07115] Updated weights for policy 0, policy_version 54662 (0.0009) -[2024-07-05 14:34:28,912][03359] Fps is (10 sec: 37703.7, 60 sec: 37273.6, 300 sec: 37461.0). Total num frames: 427859968. Throughput: 0: 9327.0. Samples: 6958356. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:34:28,913][03359] Avg episode reward: [(0, '55.149')] -[2024-07-05 14:34:29,101][07115] Updated weights for policy 0, policy_version 54672 (0.0011) -[2024-07-05 14:34:31,283][07115] Updated weights for policy 0, policy_version 54682 (0.0009) -[2024-07-05 14:34:33,491][07115] Updated weights for policy 0, policy_version 54692 (0.0009) -[2024-07-05 14:34:33,912][03359] Fps is (10 sec: 36863.9, 60 sec: 37274.2, 300 sec: 37433.3). Total num frames: 428040192. Throughput: 0: 9322.7. Samples: 6986012. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:34:33,914][03359] Avg episode reward: [(0, '53.851')] -[2024-07-05 14:34:35,707][07115] Updated weights for policy 0, policy_version 54702 (0.0009) -[2024-07-05 14:34:37,873][07115] Updated weights for policy 0, policy_version 54712 (0.0010) -[2024-07-05 14:34:38,912][03359] Fps is (10 sec: 36863.8, 60 sec: 37273.7, 300 sec: 37433.3). Total num frames: 428228608. Throughput: 0: 9330.7. Samples: 7042384. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:34:38,914][03359] Avg episode reward: [(0, '53.091')] -[2024-07-05 14:34:40,094][07115] Updated weights for policy 0, policy_version 54722 (0.0009) -[2024-07-05 14:34:42,254][07115] Updated weights for policy 0, policy_version 54732 (0.0009) -[2024-07-05 14:34:43,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37273.6, 300 sec: 37433.3). Total num frames: 428417024. Throughput: 0: 9326.4. Samples: 7098768. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:34:43,914][03359] Avg episode reward: [(0, '56.273')] -[2024-07-05 14:34:44,460][07115] Updated weights for policy 0, policy_version 54742 (0.0012) -[2024-07-05 14:34:46,625][07115] Updated weights for policy 0, policy_version 54752 (0.0009) -[2024-07-05 14:34:48,783][07115] Updated weights for policy 0, policy_version 54762 (0.0009) -[2024-07-05 14:34:48,912][03359] Fps is (10 sec: 37683.6, 60 sec: 37410.1, 300 sec: 37433.3). Total num frames: 428605440. Throughput: 0: 9325.1. Samples: 7126940. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:34:48,913][03359] Avg episode reward: [(0, '55.357')] -[2024-07-05 14:34:50,978][07115] Updated weights for policy 0, policy_version 54772 (0.0009) -[2024-07-05 14:34:53,171][07115] Updated weights for policy 0, policy_version 54782 (0.0010) -[2024-07-05 14:34:53,912][03359] Fps is (10 sec: 37683.4, 60 sec: 37410.2, 300 sec: 37461.5). Total num frames: 428793856. Throughput: 0: 9330.4. Samples: 7182768. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:34:53,913][03359] Avg episode reward: [(0, '54.840')] -[2024-07-05 14:34:55,360][07115] Updated weights for policy 0, policy_version 54792 (0.0010) -[2024-07-05 14:34:57,578][07115] Updated weights for policy 0, policy_version 54802 (0.0013) -[2024-07-05 14:34:58,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37410.2, 300 sec: 37461.1). Total num frames: 428982272. Throughput: 0: 9328.9. Samples: 7238516. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:34:58,913][03359] Avg episode reward: [(0, '51.963')] -[2024-07-05 14:34:59,787][07115] Updated weights for policy 0, policy_version 54812 (0.0011) -[2024-07-05 14:35:02,019][07115] Updated weights for policy 0, policy_version 54822 (0.0015) -[2024-07-05 14:35:03,912][03359] Fps is (10 sec: 36863.9, 60 sec: 37273.6, 300 sec: 37433.3). Total num frames: 429162496. Throughput: 0: 9315.4. Samples: 7266432. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:35:03,914][03359] Avg episode reward: [(0, '54.001')] -[2024-07-05 14:35:04,194][07115] Updated weights for policy 0, policy_version 54832 (0.0010) -[2024-07-05 14:35:06,349][07115] Updated weights for policy 0, policy_version 54842 (0.0013) -[2024-07-05 14:35:08,576][07115] Updated weights for policy 0, policy_version 54852 (0.0013) -[2024-07-05 14:35:08,912][03359] Fps is (10 sec: 36863.6, 60 sec: 37273.6, 300 sec: 37433.3). Total num frames: 429350912. Throughput: 0: 9333.3. Samples: 7322540. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:35:08,913][03359] Avg episode reward: [(0, '52.511')] -[2024-07-05 14:35:10,758][07115] Updated weights for policy 0, policy_version 54862 (0.0011) -[2024-07-05 14:35:12,939][07115] Updated weights for policy 0, policy_version 54872 (0.0010) -[2024-07-05 14:35:13,912][03359] Fps is (10 sec: 37682.7, 60 sec: 37410.1, 300 sec: 37433.3). Total num frames: 429539328. Throughput: 0: 9341.5. Samples: 7378724. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:35:13,913][03359] Avg episode reward: [(0, '52.063')] -[2024-07-05 14:35:15,112][07115] Updated weights for policy 0, policy_version 54882 (0.0009) -[2024-07-05 14:35:17,267][07115] Updated weights for policy 0, policy_version 54892 (0.0011) -[2024-07-05 14:35:18,913][03359] Fps is (10 sec: 37682.5, 60 sec: 37413.4, 300 sec: 37433.2). Total num frames: 429727744. Throughput: 0: 9361.5. Samples: 7407284. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:35:18,913][03359] Avg episode reward: [(0, '53.789')] -[2024-07-05 14:35:19,428][07115] Updated weights for policy 0, policy_version 54902 (0.0013) -[2024-07-05 14:35:21,636][07115] Updated weights for policy 0, policy_version 54912 (0.0009) -[2024-07-05 14:35:23,814][07115] Updated weights for policy 0, policy_version 54922 (0.0009) -[2024-07-05 14:35:23,912][03359] Fps is (10 sec: 37683.6, 60 sec: 37410.1, 300 sec: 37433.3). Total num frames: 429916160. Throughput: 0: 9360.1. Samples: 7463588. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:35:23,913][03359] Avg episode reward: [(0, '53.425')] -[2024-07-05 14:35:26,013][07115] Updated weights for policy 0, policy_version 54932 (0.0009) -[2024-07-05 14:35:28,172][07115] Updated weights for policy 0, policy_version 54942 (0.0009) -[2024-07-05 14:35:28,912][03359] Fps is (10 sec: 37684.0, 60 sec: 37410.1, 300 sec: 37461.0). Total num frames: 430104576. Throughput: 0: 9352.2. Samples: 7519616. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:35:28,913][03359] Avg episode reward: [(0, '53.118')] -[2024-07-05 14:35:30,415][07115] Updated weights for policy 0, policy_version 54952 (0.0013) -[2024-07-05 14:35:32,575][07115] Updated weights for policy 0, policy_version 54962 (0.0009) -[2024-07-05 14:35:33,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37546.7, 300 sec: 37461.0). Total num frames: 430292992. Throughput: 0: 9342.2. Samples: 7547340. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:35:33,914][03359] Avg episode reward: [(0, '52.032')] -[2024-07-05 14:35:34,769][07115] Updated weights for policy 0, policy_version 54972 (0.0012) -[2024-07-05 14:35:36,991][07115] Updated weights for policy 0, policy_version 54982 (0.0012) -[2024-07-05 14:35:38,920][03359] Fps is (10 sec: 36835.3, 60 sec: 37405.3, 300 sec: 37432.3). Total num frames: 430473216. Throughput: 0: 9345.4. Samples: 7603384. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:35:38,922][03359] Avg episode reward: [(0, '53.547')] -[2024-07-05 14:35:39,234][07115] Updated weights for policy 0, policy_version 54992 (0.0009) -[2024-07-05 14:35:41,416][07115] Updated weights for policy 0, policy_version 55002 (0.0011) -[2024-07-05 14:35:43,601][07115] Updated weights for policy 0, policy_version 55012 (0.0011) -[2024-07-05 14:35:43,912][03359] Fps is (10 sec: 36864.0, 60 sec: 37410.2, 300 sec: 37433.3). Total num frames: 430661632. Throughput: 0: 9353.1. Samples: 7659404. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:35:43,914][03359] Avg episode reward: [(0, '55.070')] -[2024-07-05 14:35:45,798][07115] Updated weights for policy 0, policy_version 55022 (0.0011) -[2024-07-05 14:35:47,970][07115] Updated weights for policy 0, policy_version 55032 (0.0013) -[2024-07-05 14:35:48,912][03359] Fps is (10 sec: 37713.0, 60 sec: 37410.2, 300 sec: 37433.3). Total num frames: 430850048. Throughput: 0: 9348.2. Samples: 7687100. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:35:48,913][03359] Avg episode reward: [(0, '52.171')] -[2024-07-05 14:35:48,917][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000055036_430850048.pth... -[2024-07-05 14:35:49,002][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000053941_421879808.pth -[2024-07-05 14:35:50,138][07115] Updated weights for policy 0, policy_version 55042 (0.0010) -[2024-07-05 14:35:52,425][07115] Updated weights for policy 0, policy_version 55052 (0.0012) -[2024-07-05 14:35:53,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37410.1, 300 sec: 37433.7). Total num frames: 431038464. Throughput: 0: 9353.4. Samples: 7743440. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 14:35:53,914][03359] Avg episode reward: [(0, '51.584')] -[2024-07-05 14:35:54,597][07115] Updated weights for policy 0, policy_version 55062 (0.0009) -[2024-07-05 14:35:56,726][07115] Updated weights for policy 0, policy_version 55072 (0.0012) -[2024-07-05 14:35:58,872][07115] Updated weights for policy 0, policy_version 55082 (0.0012) -[2024-07-05 14:35:58,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37410.2, 300 sec: 37434.1). Total num frames: 431226880. Throughput: 0: 9358.8. Samples: 7799868. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 14:35:58,913][03359] Avg episode reward: [(0, '54.213')] -[2024-07-05 14:36:01,083][07115] Updated weights for policy 0, policy_version 55092 (0.0009) -[2024-07-05 14:36:03,231][07115] Updated weights for policy 0, policy_version 55102 (0.0011) -[2024-07-05 14:36:03,912][03359] Fps is (10 sec: 36863.9, 60 sec: 37410.1, 300 sec: 37405.5). Total num frames: 431407104. Throughput: 0: 9346.9. Samples: 7827892. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 14:36:03,913][03359] Avg episode reward: [(0, '52.036')] -[2024-07-05 14:36:05,472][07115] Updated weights for policy 0, policy_version 55112 (0.0010) -[2024-07-05 14:36:07,651][07115] Updated weights for policy 0, policy_version 55122 (0.0009) -[2024-07-05 14:36:08,919][03359] Fps is (10 sec: 36843.0, 60 sec: 37406.7, 300 sec: 37404.8). Total num frames: 431595520. Throughput: 0: 9336.3. Samples: 7883776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 14:36:08,932][03359] Avg episode reward: [(0, '53.780')] -[2024-07-05 14:36:09,852][07115] Updated weights for policy 0, policy_version 55132 (0.0010) -[2024-07-05 14:36:12,030][07115] Updated weights for policy 0, policy_version 55142 (0.0010) -[2024-07-05 14:36:13,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37410.2, 300 sec: 37405.5). Total num frames: 431783936. Throughput: 0: 9339.9. Samples: 7939912. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 14:36:13,914][03359] Avg episode reward: [(0, '53.174')] -[2024-07-05 14:36:14,229][07115] Updated weights for policy 0, policy_version 55152 (0.0009) -[2024-07-05 14:36:16,395][07115] Updated weights for policy 0, policy_version 55162 (0.0011) -[2024-07-05 14:36:18,614][07115] Updated weights for policy 0, policy_version 55172 (0.0010) -[2024-07-05 14:36:18,912][03359] Fps is (10 sec: 37704.5, 60 sec: 37410.3, 300 sec: 37405.5). Total num frames: 431972352. Throughput: 0: 9347.8. Samples: 7967992. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 14:36:18,913][03359] Avg episode reward: [(0, '51.947')] -[2024-07-05 14:36:20,816][07115] Updated weights for policy 0, policy_version 55182 (0.0009) -[2024-07-05 14:36:23,002][07115] Updated weights for policy 0, policy_version 55192 (0.0012) -[2024-07-05 14:36:23,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37410.2, 300 sec: 37405.5). Total num frames: 432160768. Throughput: 0: 9352.3. Samples: 8024164. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 14:36:23,914][03359] Avg episode reward: [(0, '52.539')] -[2024-07-05 14:36:25,193][07115] Updated weights for policy 0, policy_version 55202 (0.0009) -[2024-07-05 14:36:27,372][07115] Updated weights for policy 0, policy_version 55212 (0.0012) -[2024-07-05 14:36:28,912][03359] Fps is (10 sec: 37683.1, 60 sec: 37410.2, 300 sec: 37405.5). Total num frames: 432349184. Throughput: 0: 9359.2. Samples: 8080568. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:36:28,913][03359] Avg episode reward: [(0, '53.136')] -[2024-07-05 14:36:29,555][07115] Updated weights for policy 0, policy_version 55222 (0.0012) -[2024-07-05 14:36:31,701][07115] Updated weights for policy 0, policy_version 55232 (0.0015) -[2024-07-05 14:36:33,865][07115] Updated weights for policy 0, policy_version 55242 (0.0009) -[2024-07-05 14:36:33,912][03359] Fps is (10 sec: 37683.1, 60 sec: 37410.1, 300 sec: 37405.5). Total num frames: 432537600. Throughput: 0: 9359.4. Samples: 8108272. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:36:33,913][03359] Avg episode reward: [(0, '51.326')] -[2024-07-05 14:36:36,085][07115] Updated weights for policy 0, policy_version 55252 (0.0009) -[2024-07-05 14:36:38,248][07115] Updated weights for policy 0, policy_version 55262 (0.0012) -[2024-07-05 14:36:38,912][03359] Fps is (10 sec: 36863.9, 60 sec: 37415.0, 300 sec: 37377.7). Total num frames: 432717824. Throughput: 0: 9374.7. Samples: 8165304. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:36:38,914][03359] Avg episode reward: [(0, '54.033')] -[2024-07-05 14:36:40,402][07115] Updated weights for policy 0, policy_version 55272 (0.0013) -[2024-07-05 14:36:42,644][07115] Updated weights for policy 0, policy_version 55282 (0.0009) -[2024-07-05 14:36:43,912][03359] Fps is (10 sec: 36863.8, 60 sec: 37410.1, 300 sec: 37405.5). Total num frames: 432906240. Throughput: 0: 9365.1. Samples: 8221300. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:36:43,914][03359] Avg episode reward: [(0, '55.029')] -[2024-07-05 14:36:44,821][07115] Updated weights for policy 0, policy_version 55292 (0.0010) -[2024-07-05 14:36:47,008][07115] Updated weights for policy 0, policy_version 55302 (0.0009) -[2024-07-05 14:36:48,912][03359] Fps is (10 sec: 37682.7, 60 sec: 37410.0, 300 sec: 37405.5). Total num frames: 433094656. Throughput: 0: 9364.5. Samples: 8249296. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:36:48,914][03359] Avg episode reward: [(0, '55.055')] -[2024-07-05 14:36:49,235][07115] Updated weights for policy 0, policy_version 55312 (0.0009) -[2024-07-05 14:36:51,374][07115] Updated weights for policy 0, policy_version 55322 (0.0010) -[2024-07-05 14:36:53,546][07115] Updated weights for policy 0, policy_version 55332 (0.0009) -[2024-07-05 14:36:53,912][03359] Fps is (10 sec: 37683.5, 60 sec: 37410.1, 300 sec: 37405.5). Total num frames: 433283072. Throughput: 0: 9374.8. Samples: 8305588. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:36:53,913][03359] Avg episode reward: [(0, '54.060')] -[2024-07-05 14:36:55,791][07115] Updated weights for policy 0, policy_version 55342 (0.0010) -[2024-07-05 14:36:57,963][07115] Updated weights for policy 0, policy_version 55352 (0.0009) -[2024-07-05 14:36:58,912][03359] Fps is (10 sec: 37683.6, 60 sec: 37410.1, 300 sec: 37405.5). Total num frames: 433471488. Throughput: 0: 9371.9. Samples: 8361648. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:36:58,914][03359] Avg episode reward: [(0, '56.008')] -[2024-07-05 14:37:00,174][07115] Updated weights for policy 0, policy_version 55362 (0.0009) -[2024-07-05 14:37:02,382][07115] Updated weights for policy 0, policy_version 55372 (0.0010) -[2024-07-05 14:37:03,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37546.7, 300 sec: 37405.5). Total num frames: 433659904. Throughput: 0: 9358.8. Samples: 8389136. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:37:03,913][03359] Avg episode reward: [(0, '55.148')] -[2024-07-05 14:37:04,551][07115] Updated weights for policy 0, policy_version 55382 (0.0010) -[2024-07-05 14:37:06,714][07115] Updated weights for policy 0, policy_version 55392 (0.0009) -[2024-07-05 14:37:08,888][07115] Updated weights for policy 0, policy_version 55402 (0.0016) -[2024-07-05 14:37:08,912][03359] Fps is (10 sec: 37683.5, 60 sec: 37550.2, 300 sec: 37405.5). Total num frames: 433848320. Throughput: 0: 9365.2. Samples: 8445600. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:37:08,913][03359] Avg episode reward: [(0, '52.296')] -[2024-07-05 14:37:11,082][07115] Updated weights for policy 0, policy_version 55412 (0.0011) -[2024-07-05 14:37:13,278][07115] Updated weights for policy 0, policy_version 55422 (0.0010) -[2024-07-05 14:37:13,915][03359] Fps is (10 sec: 36854.3, 60 sec: 37408.5, 300 sec: 37377.4). Total num frames: 434028544. Throughput: 0: 9349.5. Samples: 8501320. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:37:13,916][03359] Avg episode reward: [(0, '54.271')] -[2024-07-05 14:37:15,482][07115] Updated weights for policy 0, policy_version 55432 (0.0009) -[2024-07-05 14:37:17,715][07115] Updated weights for policy 0, policy_version 55442 (0.0010) -[2024-07-05 14:37:18,913][03359] Fps is (10 sec: 36863.0, 60 sec: 37410.0, 300 sec: 37405.6). Total num frames: 434216960. Throughput: 0: 9367.8. Samples: 8529824. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:37:18,914][03359] Avg episode reward: [(0, '53.145')] -[2024-07-05 14:37:19,906][07115] Updated weights for policy 0, policy_version 55452 (0.0009) -[2024-07-05 14:37:22,069][07115] Updated weights for policy 0, policy_version 55462 (0.0010) -[2024-07-05 14:37:23,912][03359] Fps is (10 sec: 37693.2, 60 sec: 37410.1, 300 sec: 37405.5). Total num frames: 434405376. Throughput: 0: 9350.6. Samples: 8586080. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:37:23,914][03359] Avg episode reward: [(0, '55.236')] -[2024-07-05 14:37:24,225][07115] Updated weights for policy 0, policy_version 55472 (0.0013) -[2024-07-05 14:37:26,448][07115] Updated weights for policy 0, policy_version 55482 (0.0009) -[2024-07-05 14:37:28,606][07115] Updated weights for policy 0, policy_version 55492 (0.0009) -[2024-07-05 14:37:28,912][03359] Fps is (10 sec: 37684.2, 60 sec: 37410.2, 300 sec: 37405.5). Total num frames: 434593792. Throughput: 0: 9356.7. Samples: 8642352. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:37:28,914][03359] Avg episode reward: [(0, '52.459')] -[2024-07-05 14:37:30,786][07115] Updated weights for policy 0, policy_version 55502 (0.0013) -[2024-07-05 14:37:32,955][07115] Updated weights for policy 0, policy_version 55512 (0.0009) -[2024-07-05 14:37:33,912][03359] Fps is (10 sec: 37683.0, 60 sec: 37410.1, 300 sec: 37405.5). Total num frames: 434782208. Throughput: 0: 9357.8. Samples: 8670396. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:37:33,914][03359] Avg episode reward: [(0, '53.322')] -[2024-07-05 14:37:35,183][07115] Updated weights for policy 0, policy_version 55522 (0.0010) -[2024-07-05 14:37:37,362][07115] Updated weights for policy 0, policy_version 55532 (0.0009) -[2024-07-05 14:37:38,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37546.7, 300 sec: 37405.5). Total num frames: 434970624. Throughput: 0: 9360.1. Samples: 8726792. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:37:38,913][03359] Avg episode reward: [(0, '54.098')] -[2024-07-05 14:37:39,567][07115] Updated weights for policy 0, policy_version 55542 (0.0011) -[2024-07-05 14:37:41,795][07115] Updated weights for policy 0, policy_version 55552 (0.0012) -[2024-07-05 14:37:43,918][03359] Fps is (10 sec: 36842.9, 60 sec: 37406.6, 300 sec: 37377.0). Total num frames: 435150848. Throughput: 0: 9340.8. Samples: 8782036. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:37:43,919][03359] Avg episode reward: [(0, '56.756')] -[2024-07-05 14:37:43,969][07115] Updated weights for policy 0, policy_version 55562 (0.0014) -[2024-07-05 14:37:46,170][07115] Updated weights for policy 0, policy_version 55572 (0.0009) -[2024-07-05 14:37:48,344][07115] Updated weights for policy 0, policy_version 55582 (0.0009) -[2024-07-05 14:37:48,912][03359] Fps is (10 sec: 36863.8, 60 sec: 37410.2, 300 sec: 37377.7). Total num frames: 435339264. Throughput: 0: 9364.1. Samples: 8810520. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:37:48,914][03359] Avg episode reward: [(0, '52.591')] -[2024-07-05 14:37:48,920][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000055584_435339264.pth... -[2024-07-05 14:37:49,004][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000054489_426369024.pth -[2024-07-05 14:37:50,585][07115] Updated weights for policy 0, policy_version 55592 (0.0009) -[2024-07-05 14:37:52,783][07115] Updated weights for policy 0, policy_version 55602 (0.0009) -[2024-07-05 14:37:53,912][03359] Fps is (10 sec: 37705.0, 60 sec: 37410.1, 300 sec: 37377.7). Total num frames: 435527680. Throughput: 0: 9346.0. Samples: 8866172. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:37:53,914][03359] Avg episode reward: [(0, '54.519')] -[2024-07-05 14:37:55,003][07115] Updated weights for policy 0, policy_version 55612 (0.0015) -[2024-07-05 14:37:57,187][07115] Updated weights for policy 0, policy_version 55622 (0.0008) -[2024-07-05 14:37:58,912][03359] Fps is (10 sec: 36864.1, 60 sec: 37273.6, 300 sec: 37350.0). Total num frames: 435707904. Throughput: 0: 9341.9. Samples: 8921680. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:37:58,914][03359] Avg episode reward: [(0, '54.911')] -[2024-07-05 14:37:59,351][07115] Updated weights for policy 0, policy_version 55632 (0.0009) -[2024-07-05 14:38:01,564][07115] Updated weights for policy 0, policy_version 55642 (0.0010) -[2024-07-05 14:38:03,809][07115] Updated weights for policy 0, policy_version 55652 (0.0010) -[2024-07-05 14:38:03,912][03359] Fps is (10 sec: 36863.5, 60 sec: 37273.5, 300 sec: 37377.8). Total num frames: 435896320. Throughput: 0: 9332.6. Samples: 8949792. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:03,914][03359] Avg episode reward: [(0, '54.333')] -[2024-07-05 14:38:05,987][07115] Updated weights for policy 0, policy_version 55662 (0.0010) -[2024-07-05 14:38:08,198][07115] Updated weights for policy 0, policy_version 55672 (0.0009) -[2024-07-05 14:38:08,912][03359] Fps is (10 sec: 37683.4, 60 sec: 37273.6, 300 sec: 37378.5). Total num frames: 436084736. Throughput: 0: 9319.7. Samples: 9005464. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:08,913][03359] Avg episode reward: [(0, '54.142')] -[2024-07-05 14:38:10,421][07115] Updated weights for policy 0, policy_version 55682 (0.0009) -[2024-07-05 14:38:12,622][07115] Updated weights for policy 0, policy_version 55692 (0.0013) -[2024-07-05 14:38:13,912][03359] Fps is (10 sec: 36865.0, 60 sec: 37275.3, 300 sec: 37350.6). Total num frames: 436264960. Throughput: 0: 9300.5. Samples: 9060872. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:13,914][03359] Avg episode reward: [(0, '52.506')] -[2024-07-05 14:38:14,799][07115] Updated weights for policy 0, policy_version 55702 (0.0009) -[2024-07-05 14:38:16,979][07115] Updated weights for policy 0, policy_version 55712 (0.0013) -[2024-07-05 14:38:18,912][03359] Fps is (10 sec: 36863.6, 60 sec: 37273.7, 300 sec: 37350.0). Total num frames: 436453376. Throughput: 0: 9309.4. Samples: 9089320. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:18,914][03359] Avg episode reward: [(0, '52.617')] -[2024-07-05 14:38:19,196][07115] Updated weights for policy 0, policy_version 55722 (0.0010) -[2024-07-05 14:38:21,369][07115] Updated weights for policy 0, policy_version 55732 (0.0009) -[2024-07-05 14:38:23,545][07115] Updated weights for policy 0, policy_version 55742 (0.0008) -[2024-07-05 14:38:23,912][03359] Fps is (10 sec: 37682.3, 60 sec: 37273.5, 300 sec: 37350.0). Total num frames: 436641792. Throughput: 0: 9291.8. Samples: 9144924. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:23,914][03359] Avg episode reward: [(0, '54.031')] -[2024-07-05 14:38:25,795][07115] Updated weights for policy 0, policy_version 55752 (0.0015) -[2024-07-05 14:38:28,006][07115] Updated weights for policy 0, policy_version 55762 (0.0009) -[2024-07-05 14:38:28,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37273.5, 300 sec: 37377.9). Total num frames: 436830208. Throughput: 0: 9307.9. Samples: 9200840. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:28,913][03359] Avg episode reward: [(0, '53.614')] -[2024-07-05 14:38:30,182][07115] Updated weights for policy 0, policy_version 55772 (0.0013) -[2024-07-05 14:38:32,390][07115] Updated weights for policy 0, policy_version 55782 (0.0009) -[2024-07-05 14:38:33,913][03359] Fps is (10 sec: 36862.1, 60 sec: 37136.7, 300 sec: 37349.9). Total num frames: 437010432. Throughput: 0: 9289.3. Samples: 9228544. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:33,914][03359] Avg episode reward: [(0, '54.376')] -[2024-07-05 14:38:34,628][07115] Updated weights for policy 0, policy_version 55792 (0.0009) -[2024-07-05 14:38:36,846][07115] Updated weights for policy 0, policy_version 55802 (0.0010) -[2024-07-05 14:38:38,912][03359] Fps is (10 sec: 36864.3, 60 sec: 37137.1, 300 sec: 37350.0). Total num frames: 437198848. Throughput: 0: 9290.4. Samples: 9284240. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:38,913][03359] Avg episode reward: [(0, '53.596')] -[2024-07-05 14:38:39,044][07115] Updated weights for policy 0, policy_version 55812 (0.0010) -[2024-07-05 14:38:41,229][07115] Updated weights for policy 0, policy_version 55822 (0.0009) -[2024-07-05 14:38:43,445][07115] Updated weights for policy 0, policy_version 55832 (0.0010) -[2024-07-05 14:38:43,912][03359] Fps is (10 sec: 37685.7, 60 sec: 37277.2, 300 sec: 37377.7). Total num frames: 437387264. Throughput: 0: 9295.2. Samples: 9339964. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:43,913][03359] Avg episode reward: [(0, '53.743')] -[2024-07-05 14:38:45,685][07115] Updated weights for policy 0, policy_version 55842 (0.0009) -[2024-07-05 14:38:47,914][07115] Updated weights for policy 0, policy_version 55852 (0.0009) -[2024-07-05 14:38:48,912][03359] Fps is (10 sec: 36863.8, 60 sec: 37137.1, 300 sec: 37350.0). Total num frames: 437567488. Throughput: 0: 9282.8. Samples: 9367516. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:48,914][03359] Avg episode reward: [(0, '57.179')] -[2024-07-05 14:38:50,145][07115] Updated weights for policy 0, policy_version 55862 (0.0009) -[2024-07-05 14:38:52,324][07115] Updated weights for policy 0, policy_version 55872 (0.0009) -[2024-07-05 14:38:53,912][03359] Fps is (10 sec: 36863.6, 60 sec: 37137.0, 300 sec: 37350.0). Total num frames: 437755904. Throughput: 0: 9293.5. Samples: 9423672. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:53,914][03359] Avg episode reward: [(0, '54.473')] -[2024-07-05 14:38:54,500][07115] Updated weights for policy 0, policy_version 55882 (0.0011) -[2024-07-05 14:38:56,660][07115] Updated weights for policy 0, policy_version 55892 (0.0010) -[2024-07-05 14:38:58,816][07115] Updated weights for policy 0, policy_version 55902 (0.0010) -[2024-07-05 14:38:58,912][03359] Fps is (10 sec: 37683.5, 60 sec: 37273.6, 300 sec: 37350.0). Total num frames: 437944320. Throughput: 0: 9307.5. Samples: 9479712. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:38:58,913][03359] Avg episode reward: [(0, '52.469')] -[2024-07-05 14:39:01,016][07115] Updated weights for policy 0, policy_version 55912 (0.0012) -[2024-07-05 14:39:03,252][07115] Updated weights for policy 0, policy_version 55922 (0.0010) -[2024-07-05 14:39:03,913][03359] Fps is (10 sec: 36862.9, 60 sec: 37136.9, 300 sec: 37322.2). Total num frames: 438124544. Throughput: 0: 9295.0. Samples: 9507600. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:39:03,914][03359] Avg episode reward: [(0, '53.321')] -[2024-07-05 14:39:05,419][07115] Updated weights for policy 0, policy_version 55932 (0.0009) -[2024-07-05 14:39:07,638][07115] Updated weights for policy 0, policy_version 55942 (0.0009) -[2024-07-05 14:39:08,920][03359] Fps is (10 sec: 36844.8, 60 sec: 37133.8, 300 sec: 37349.3). Total num frames: 438312960. Throughput: 0: 9296.5. Samples: 9563312. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:39:08,939][03359] Avg episode reward: [(0, '53.324')] -[2024-07-05 14:39:09,839][07115] Updated weights for policy 0, policy_version 55952 (0.0013) -[2024-07-05 14:39:12,033][07115] Updated weights for policy 0, policy_version 55962 (0.0010) -[2024-07-05 14:39:13,912][03359] Fps is (10 sec: 37684.7, 60 sec: 37273.5, 300 sec: 37350.7). Total num frames: 438501376. Throughput: 0: 9299.7. Samples: 9619324. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:39:13,913][03359] Avg episode reward: [(0, '53.852')] -[2024-07-05 14:39:14,267][07115] Updated weights for policy 0, policy_version 55972 (0.0013) -[2024-07-05 14:39:16,466][07115] Updated weights for policy 0, policy_version 55982 (0.0011) -[2024-07-05 14:39:18,678][07115] Updated weights for policy 0, policy_version 55992 (0.0011) -[2024-07-05 14:39:18,912][03359] Fps is (10 sec: 37702.5, 60 sec: 37273.6, 300 sec: 37350.0). Total num frames: 438689792. Throughput: 0: 9302.3. Samples: 9647144. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:39:18,914][03359] Avg episode reward: [(0, '52.802')] -[2024-07-05 14:39:20,863][07115] Updated weights for policy 0, policy_version 56002 (0.0011) -[2024-07-05 14:39:23,028][07115] Updated weights for policy 0, policy_version 56012 (0.0008) -[2024-07-05 14:39:23,912][03359] Fps is (10 sec: 36863.0, 60 sec: 37137.0, 300 sec: 37322.2). Total num frames: 438870016. Throughput: 0: 9309.7. Samples: 9703180. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:39:23,913][03359] Avg episode reward: [(0, '50.946')] -[2024-07-05 14:39:25,245][07115] Updated weights for policy 0, policy_version 56022 (0.0010) -[2024-07-05 14:39:27,476][07115] Updated weights for policy 0, policy_version 56032 (0.0009) -[2024-07-05 14:39:28,912][03359] Fps is (10 sec: 36864.4, 60 sec: 37137.1, 300 sec: 37350.0). Total num frames: 439058432. Throughput: 0: 9316.9. Samples: 9759224. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:39:28,925][03359] Avg episode reward: [(0, '56.312')] -[2024-07-05 14:39:29,607][07115] Updated weights for policy 0, policy_version 56042 (0.0010) -[2024-07-05 14:39:31,822][07115] Updated weights for policy 0, policy_version 56052 (0.0009) -[2024-07-05 14:39:33,912][03359] Fps is (10 sec: 37684.2, 60 sec: 37274.0, 300 sec: 37350.0). Total num frames: 439246848. Throughput: 0: 9328.6. Samples: 9787300. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:39:33,913][03359] Avg episode reward: [(0, '54.705')] -[2024-07-05 14:39:34,053][07115] Updated weights for policy 0, policy_version 56062 (0.0009) -[2024-07-05 14:39:36,274][07115] Updated weights for policy 0, policy_version 56072 (0.0009) -[2024-07-05 14:39:38,436][07115] Updated weights for policy 0, policy_version 56082 (0.0009) -[2024-07-05 14:39:38,912][03359] Fps is (10 sec: 37683.1, 60 sec: 37273.6, 300 sec: 37350.0). Total num frames: 439435264. Throughput: 0: 9314.1. Samples: 9842808. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:39:38,913][03359] Avg episode reward: [(0, '54.753')] -[2024-07-05 14:39:40,646][07115] Updated weights for policy 0, policy_version 56092 (0.0010) -[2024-07-05 14:39:42,848][07115] Updated weights for policy 0, policy_version 56102 (0.0009) -[2024-07-05 14:39:43,913][03359] Fps is (10 sec: 36863.7, 60 sec: 37137.0, 300 sec: 37322.2). Total num frames: 439615488. Throughput: 0: 9300.1. Samples: 9898216. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:39:43,915][03359] Avg episode reward: [(0, '55.142')] -[2024-07-05 14:39:45,050][07115] Updated weights for policy 0, policy_version 56112 (0.0009) -[2024-07-05 14:39:47,245][07115] Updated weights for policy 0, policy_version 56122 (0.0010) -[2024-07-05 14:39:48,912][03359] Fps is (10 sec: 36864.0, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 439803904. Throughput: 0: 9313.1. Samples: 9926688. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:39:48,914][03359] Avg episode reward: [(0, '53.380')] -[2024-07-05 14:39:48,919][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000056129_439803904.pth... -[2024-07-05 14:39:49,015][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000055036_430850048.pth -[2024-07-05 14:39:49,495][07115] Updated weights for policy 0, policy_version 56132 (0.0009) -[2024-07-05 14:39:51,667][07115] Updated weights for policy 0, policy_version 56142 (0.0009) -[2024-07-05 14:39:53,875][07115] Updated weights for policy 0, policy_version 56152 (0.0009) -[2024-07-05 14:39:53,912][03359] Fps is (10 sec: 37683.5, 60 sec: 37273.7, 300 sec: 37322.2). Total num frames: 439992320. Throughput: 0: 9310.0. Samples: 9982212. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:39:53,913][03359] Avg episode reward: [(0, '56.148')] -[2024-07-05 14:39:56,130][07115] Updated weights for policy 0, policy_version 56162 (0.0010) -[2024-07-05 14:39:58,318][07115] Updated weights for policy 0, policy_version 56172 (0.0015) -[2024-07-05 14:39:58,912][03359] Fps is (10 sec: 37683.2, 60 sec: 37273.6, 300 sec: 37350.0). Total num frames: 440180736. Throughput: 0: 9302.9. Samples: 10037956. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:39:58,913][03359] Avg episode reward: [(0, '55.810')] -[2024-07-05 14:40:00,472][07115] Updated weights for policy 0, policy_version 56182 (0.0013) -[2024-07-05 14:40:02,619][07115] Updated weights for policy 0, policy_version 56192 (0.0010) -[2024-07-05 14:40:03,913][03359] Fps is (10 sec: 36862.7, 60 sec: 37273.6, 300 sec: 37322.2). Total num frames: 440360960. Throughput: 0: 9315.5. Samples: 10066344. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:40:03,913][03359] Avg episode reward: [(0, '54.675')] -[2024-07-05 14:40:04,812][07115] Updated weights for policy 0, policy_version 56202 (0.0010) -[2024-07-05 14:40:07,039][07115] Updated weights for policy 0, policy_version 56212 (0.0010) -[2024-07-05 14:40:08,913][03359] Fps is (10 sec: 36863.2, 60 sec: 37276.7, 300 sec: 37322.2). Total num frames: 440549376. Throughput: 0: 9310.3. Samples: 10122144. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:40:08,914][03359] Avg episode reward: [(0, '54.931')] -[2024-07-05 14:40:09,247][07115] Updated weights for policy 0, policy_version 56222 (0.0013) -[2024-07-05 14:40:11,409][07115] Updated weights for policy 0, policy_version 56232 (0.0009) -[2024-07-05 14:40:13,699][07115] Updated weights for policy 0, policy_version 56242 (0.0015) -[2024-07-05 14:40:13,912][03359] Fps is (10 sec: 36865.5, 60 sec: 37137.1, 300 sec: 37294.5). Total num frames: 440729600. Throughput: 0: 9293.4. Samples: 10177428. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:40:13,913][03359] Avg episode reward: [(0, '53.943')] -[2024-07-05 14:40:15,985][07115] Updated weights for policy 0, policy_version 56252 (0.0011) -[2024-07-05 14:40:18,106][07115] Updated weights for policy 0, policy_version 56262 (0.0014) -[2024-07-05 14:40:18,912][03359] Fps is (10 sec: 36864.4, 60 sec: 37137.1, 300 sec: 37294.4). Total num frames: 440918016. Throughput: 0: 9281.3. Samples: 10204960. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:40:18,914][03359] Avg episode reward: [(0, '55.801')] -[2024-07-05 14:40:20,282][07115] Updated weights for policy 0, policy_version 56272 (0.0014) -[2024-07-05 14:40:22,395][07115] Updated weights for policy 0, policy_version 56282 (0.0009) -[2024-07-05 14:40:23,913][03359] Fps is (10 sec: 38501.4, 60 sec: 37410.2, 300 sec: 37322.2). Total num frames: 441114624. Throughput: 0: 9326.1. Samples: 10262484. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:40:23,914][03359] Avg episode reward: [(0, '53.129')] -[2024-07-05 14:40:24,537][07115] Updated weights for policy 0, policy_version 56292 (0.0009) -[2024-07-05 14:40:26,664][07115] Updated weights for policy 0, policy_version 56302 (0.0008) -[2024-07-05 14:40:28,738][07115] Updated weights for policy 0, policy_version 56312 (0.0008) -[2024-07-05 14:40:28,912][03359] Fps is (10 sec: 38502.6, 60 sec: 37410.1, 300 sec: 37322.2). Total num frames: 441303040. Throughput: 0: 9366.8. Samples: 10319720. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:40:28,913][03359] Avg episode reward: [(0, '56.036')] -[2024-07-05 14:40:30,934][07115] Updated weights for policy 0, policy_version 56322 (0.0012) -[2024-07-05 14:40:33,038][07115] Updated weights for policy 0, policy_version 56332 (0.0010) -[2024-07-05 14:40:33,912][03359] Fps is (10 sec: 37684.0, 60 sec: 37410.1, 300 sec: 37351.0). Total num frames: 441491456. Throughput: 0: 9373.0. Samples: 10348472. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:40:33,913][03359] Avg episode reward: [(0, '54.728')] -[2024-07-05 14:40:35,230][07115] Updated weights for policy 0, policy_version 56342 (0.0012) -[2024-07-05 14:40:37,372][07115] Updated weights for policy 0, policy_version 56352 (0.0010) -[2024-07-05 14:40:38,912][03359] Fps is (10 sec: 38502.6, 60 sec: 37546.7, 300 sec: 37377.7). Total num frames: 441688064. Throughput: 0: 9422.8. Samples: 10406240. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:40:38,913][03359] Avg episode reward: [(0, '54.185')] -[2024-07-05 14:40:39,523][07115] Updated weights for policy 0, policy_version 56362 (0.0009) -[2024-07-05 14:40:41,639][07115] Updated weights for policy 0, policy_version 56372 (0.0009) -[2024-07-05 14:40:43,787][07115] Updated weights for policy 0, policy_version 56382 (0.0009) -[2024-07-05 14:40:43,915][03359] Fps is (10 sec: 38491.7, 60 sec: 37681.5, 300 sec: 37377.4). Total num frames: 441876480. Throughput: 0: 9453.2. Samples: 10463376. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:40:43,916][03359] Avg episode reward: [(0, '53.705')] -[2024-07-05 14:40:45,918][07115] Updated weights for policy 0, policy_version 56392 (0.0009) -[2024-07-05 14:40:48,067][07115] Updated weights for policy 0, policy_version 56402 (0.0012) -[2024-07-05 14:40:48,913][03359] Fps is (10 sec: 37681.3, 60 sec: 37682.9, 300 sec: 37377.7). Total num frames: 442064896. Throughput: 0: 9465.3. Samples: 10492284. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:40:48,914][03359] Avg episode reward: [(0, '55.936')] -[2024-07-05 14:40:50,174][07115] Updated weights for policy 0, policy_version 56412 (0.0012) -[2024-07-05 14:40:52,296][07115] Updated weights for policy 0, policy_version 56422 (0.0009) -[2024-07-05 14:40:53,921][03359] Fps is (10 sec: 38485.6, 60 sec: 37815.2, 300 sec: 37404.6). Total num frames: 442261504. Throughput: 0: 9492.1. Samples: 10549352. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:40:53,922][03359] Avg episode reward: [(0, '51.880')] -[2024-07-05 14:40:54,398][07115] Updated weights for policy 0, policy_version 56432 (0.0009) -[2024-07-05 14:40:56,574][07115] Updated weights for policy 0, policy_version 56442 (0.0014) -[2024-07-05 14:40:58,769][07115] Updated weights for policy 0, policy_version 56452 (0.0009) -[2024-07-05 14:40:58,917][03359] Fps is (10 sec: 38484.8, 60 sec: 37816.5, 300 sec: 37432.6). Total num frames: 442449920. Throughput: 0: 9545.3. Samples: 10607016. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:40:58,919][03359] Avg episode reward: [(0, '53.935')] -[2024-07-05 14:41:00,911][07115] Updated weights for policy 0, policy_version 56462 (0.0011) -[2024-07-05 14:41:03,040][07115] Updated weights for policy 0, policy_version 56472 (0.0008) -[2024-07-05 14:41:03,912][03359] Fps is (10 sec: 38529.9, 60 sec: 38093.0, 300 sec: 37461.8). Total num frames: 442646528. Throughput: 0: 9571.7. Samples: 10635684. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:41:03,913][03359] Avg episode reward: [(0, '54.317')] -[2024-07-05 14:41:05,237][07115] Updated weights for policy 0, policy_version 56482 (0.0008) -[2024-07-05 14:41:07,347][07115] Updated weights for policy 0, policy_version 56492 (0.0013) -[2024-07-05 14:41:08,912][03359] Fps is (10 sec: 38521.4, 60 sec: 38092.8, 300 sec: 37461.0). Total num frames: 442834944. Throughput: 0: 9570.1. Samples: 10693140. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:41:08,914][03359] Avg episode reward: [(0, '54.564')] -[2024-07-05 14:41:09,431][07115] Updated weights for policy 0, policy_version 56502 (0.0012) -[2024-07-05 14:41:11,555][07115] Updated weights for policy 0, policy_version 56512 (0.0009) -[2024-07-05 14:41:13,715][07115] Updated weights for policy 0, policy_version 56522 (0.0009) -[2024-07-05 14:41:13,912][03359] Fps is (10 sec: 37682.9, 60 sec: 38229.2, 300 sec: 37461.0). Total num frames: 443023360. Throughput: 0: 9572.2. Samples: 10750468. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:41:13,913][03359] Avg episode reward: [(0, '52.518')] -[2024-07-05 14:41:15,827][07115] Updated weights for policy 0, policy_version 56532 (0.0008) -[2024-07-05 14:41:17,964][07115] Updated weights for policy 0, policy_version 56542 (0.0011) -[2024-07-05 14:41:18,912][03359] Fps is (10 sec: 38502.9, 60 sec: 38365.9, 300 sec: 37488.8). Total num frames: 443219968. Throughput: 0: 9573.0. Samples: 10779256. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:41:18,914][03359] Avg episode reward: [(0, '52.271')] -[2024-07-05 14:41:20,137][07115] Updated weights for policy 0, policy_version 56552 (0.0011) -[2024-07-05 14:41:22,257][07115] Updated weights for policy 0, policy_version 56562 (0.0008) -[2024-07-05 14:41:23,912][03359] Fps is (10 sec: 38502.9, 60 sec: 38229.5, 300 sec: 37488.8). Total num frames: 443408384. Throughput: 0: 9570.0. Samples: 10836888. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:41:23,913][03359] Avg episode reward: [(0, '53.092')] -[2024-07-05 14:41:24,406][07115] Updated weights for policy 0, policy_version 56572 (0.0010) -[2024-07-05 14:41:26,532][07115] Updated weights for policy 0, policy_version 56582 (0.0009) -[2024-07-05 14:41:28,627][07115] Updated weights for policy 0, policy_version 56592 (0.0009) -[2024-07-05 14:41:28,912][03359] Fps is (10 sec: 38502.6, 60 sec: 38365.9, 300 sec: 37516.6). Total num frames: 443604992. Throughput: 0: 9584.9. Samples: 10894668. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:41:28,913][03359] Avg episode reward: [(0, '54.794')] -[2024-07-05 14:41:30,756][07115] Updated weights for policy 0, policy_version 56602 (0.0009) -[2024-07-05 14:41:32,924][07115] Updated weights for policy 0, policy_version 56612 (0.0011) -[2024-07-05 14:41:33,918][03359] Fps is (10 sec: 38478.6, 60 sec: 38361.9, 300 sec: 37543.6). Total num frames: 443793408. Throughput: 0: 9576.8. Samples: 10923296. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:41:33,920][03359] Avg episode reward: [(0, '52.388')] -[2024-07-05 14:41:35,030][07115] Updated weights for policy 0, policy_version 56622 (0.0009) -[2024-07-05 14:41:37,172][07115] Updated weights for policy 0, policy_version 56632 (0.0011) -[2024-07-05 14:41:38,912][03359] Fps is (10 sec: 38501.9, 60 sec: 38365.8, 300 sec: 37572.1). Total num frames: 443990016. Throughput: 0: 9597.2. Samples: 10981160. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 14:41:38,914][03359] Avg episode reward: [(0, '55.154')] -[2024-07-05 14:41:39,333][07115] Updated weights for policy 0, policy_version 56642 (0.0010) -[2024-07-05 14:41:41,450][07115] Updated weights for policy 0, policy_version 56652 (0.0009) -[2024-07-05 14:41:43,671][07115] Updated weights for policy 0, policy_version 56662 (0.0012) -[2024-07-05 14:41:43,912][03359] Fps is (10 sec: 38526.2, 60 sec: 38367.6, 300 sec: 37572.2). Total num frames: 444178432. Throughput: 0: 9570.2. Samples: 11037628. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 14:41:43,913][03359] Avg episode reward: [(0, '53.433')] -[2024-07-05 14:41:45,841][07115] Updated weights for policy 0, policy_version 56672 (0.0011) -[2024-07-05 14:41:47,968][07115] Updated weights for policy 0, policy_version 56682 (0.0012) -[2024-07-05 14:41:48,912][03359] Fps is (10 sec: 37683.1, 60 sec: 38366.1, 300 sec: 37572.1). Total num frames: 444366848. Throughput: 0: 9561.0. Samples: 11065932. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 14:41:48,913][03359] Avg episode reward: [(0, '56.163')] -[2024-07-05 14:41:48,918][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000056686_444366848.pth... -[2024-07-05 14:41:49,002][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000055584_435339264.pth -[2024-07-05 14:41:50,155][07115] Updated weights for policy 0, policy_version 56692 (0.0012) -[2024-07-05 14:41:52,407][07115] Updated weights for policy 0, policy_version 56702 (0.0017) -[2024-07-05 14:41:53,912][03359] Fps is (10 sec: 36863.8, 60 sec: 38097.3, 300 sec: 37544.4). Total num frames: 444547072. Throughput: 0: 9529.9. Samples: 11121984. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 14:41:53,913][03359] Avg episode reward: [(0, '51.373')] -[2024-07-05 14:41:54,633][07115] Updated weights for policy 0, policy_version 56712 (0.0010) -[2024-07-05 14:41:56,821][07115] Updated weights for policy 0, policy_version 56722 (0.0011) -[2024-07-05 14:41:58,912][03359] Fps is (10 sec: 36864.4, 60 sec: 38096.0, 300 sec: 37544.4). Total num frames: 444735488. Throughput: 0: 9492.9. Samples: 11177648. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 14:41:58,913][03359] Avg episode reward: [(0, '54.902')] -[2024-07-05 14:41:59,095][07115] Updated weights for policy 0, policy_version 56732 (0.0011) -[2024-07-05 14:42:01,207][07115] Updated weights for policy 0, policy_version 56742 (0.0012) -[2024-07-05 14:42:03,427][07115] Updated weights for policy 0, policy_version 56752 (0.0013) -[2024-07-05 14:42:03,912][03359] Fps is (10 sec: 37683.3, 60 sec: 37956.3, 300 sec: 37544.4). Total num frames: 444923904. Throughput: 0: 9474.5. Samples: 11205608. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 14:42:03,913][03359] Avg episode reward: [(0, '50.852')] -[2024-07-05 14:42:05,659][07115] Updated weights for policy 0, policy_version 56762 (0.0009) -[2024-07-05 14:42:07,842][07115] Updated weights for policy 0, policy_version 56772 (0.0009) -[2024-07-05 14:42:08,919][03359] Fps is (10 sec: 36843.4, 60 sec: 37816.3, 300 sec: 37544.0). Total num frames: 445104128. Throughput: 0: 9436.4. Samples: 11261580. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 14:42:08,936][03359] Avg episode reward: [(0, '53.007')] -[2024-07-05 14:42:10,063][07115] Updated weights for policy 0, policy_version 56782 (0.0012) -[2024-07-05 14:42:12,242][07115] Updated weights for policy 0, policy_version 56792 (0.0009) -[2024-07-05 14:42:13,912][03359] Fps is (10 sec: 36863.9, 60 sec: 37819.8, 300 sec: 37544.4). Total num frames: 445292544. Throughput: 0: 9399.5. Samples: 11317644. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:42:13,913][03359] Avg episode reward: [(0, '54.487')] -[2024-07-05 14:42:14,480][07115] Updated weights for policy 0, policy_version 56802 (0.0009) -[2024-07-05 14:42:16,655][07115] Updated weights for policy 0, policy_version 56812 (0.0009) -[2024-07-05 14:42:18,877][07115] Updated weights for policy 0, policy_version 56822 (0.0016) -[2024-07-05 14:42:18,912][03359] Fps is (10 sec: 37704.3, 60 sec: 37683.2, 300 sec: 37544.4). Total num frames: 445480960. Throughput: 0: 9369.7. Samples: 11344876. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:42:18,913][03359] Avg episode reward: [(0, '51.846')] -[2024-07-05 14:42:21,128][07115] Updated weights for policy 0, policy_version 56832 (0.0010) -[2024-07-05 14:42:23,322][07115] Updated weights for policy 0, policy_version 56842 (0.0010) -[2024-07-05 14:42:23,916][03359] Fps is (10 sec: 36848.5, 60 sec: 37544.0, 300 sec: 37516.0). Total num frames: 445661184. Throughput: 0: 9322.8. Samples: 11400724. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:42:23,918][03359] Avg episode reward: [(0, '55.022')] -[2024-07-05 14:42:25,513][07115] Updated weights for policy 0, policy_version 56852 (0.0013) -[2024-07-05 14:42:27,770][07115] Updated weights for policy 0, policy_version 56862 (0.0012) -[2024-07-05 14:42:28,912][03359] Fps is (10 sec: 36864.0, 60 sec: 37410.1, 300 sec: 37516.6). Total num frames: 445849600. Throughput: 0: 9287.3. Samples: 11455556. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:42:28,913][03359] Avg episode reward: [(0, '53.083')] -[2024-07-05 14:42:30,036][07115] Updated weights for policy 0, policy_version 56872 (0.0009) -[2024-07-05 14:42:32,356][07115] Updated weights for policy 0, policy_version 56882 (0.0011) -[2024-07-05 14:42:33,912][03359] Fps is (10 sec: 36878.8, 60 sec: 37277.3, 300 sec: 37488.8). Total num frames: 446029824. Throughput: 0: 9260.2. Samples: 11482640. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:42:33,913][03359] Avg episode reward: [(0, '53.604')] -[2024-07-05 14:42:34,656][07115] Updated weights for policy 0, policy_version 56892 (0.0011) -[2024-07-05 14:42:36,975][07115] Updated weights for policy 0, policy_version 56902 (0.0011) -[2024-07-05 14:42:38,913][03359] Fps is (10 sec: 35224.0, 60 sec: 36863.8, 300 sec: 37461.7). Total num frames: 446201856. Throughput: 0: 9206.5. Samples: 11536280. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:42:38,914][03359] Avg episode reward: [(0, '54.479')] -[2024-07-05 14:42:39,231][07115] Updated weights for policy 0, policy_version 56912 (0.0010) -[2024-07-05 14:42:41,494][07115] Updated weights for policy 0, policy_version 56922 (0.0010) -[2024-07-05 14:42:43,687][07115] Updated weights for policy 0, policy_version 56932 (0.0013) -[2024-07-05 14:42:43,912][03359] Fps is (10 sec: 35226.5, 60 sec: 36727.5, 300 sec: 37433.3). Total num frames: 446382080. Throughput: 0: 9169.4. Samples: 11590272. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:42:43,913][03359] Avg episode reward: [(0, '54.642')] -[2024-07-05 14:42:45,848][07115] Updated weights for policy 0, policy_version 56942 (0.0014) -[2024-07-05 14:42:48,093][07115] Updated weights for policy 0, policy_version 56952 (0.0012) -[2024-07-05 14:42:48,912][03359] Fps is (10 sec: 36865.6, 60 sec: 36727.5, 300 sec: 37433.3). Total num frames: 446570496. Throughput: 0: 9171.2. Samples: 11618312. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:42:48,913][03359] Avg episode reward: [(0, '54.937')] -[2024-07-05 14:42:50,318][07115] Updated weights for policy 0, policy_version 56962 (0.0010) -[2024-07-05 14:42:52,606][07115] Updated weights for policy 0, policy_version 56972 (0.0011) -[2024-07-05 14:42:53,921][03359] Fps is (10 sec: 36830.3, 60 sec: 36721.9, 300 sec: 37432.1). Total num frames: 446750720. Throughput: 0: 9161.6. Samples: 11673884. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:42:53,923][03359] Avg episode reward: [(0, '53.031')] -[2024-07-05 14:42:54,814][07115] Updated weights for policy 0, policy_version 56982 (0.0010) -[2024-07-05 14:42:56,997][07115] Updated weights for policy 0, policy_version 56992 (0.0009) -[2024-07-05 14:42:58,912][03359] Fps is (10 sec: 36864.3, 60 sec: 36727.5, 300 sec: 37433.3). Total num frames: 446939136. Throughput: 0: 9149.4. Samples: 11729368. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:42:58,913][03359] Avg episode reward: [(0, '51.654')] -[2024-07-05 14:42:59,294][07115] Updated weights for policy 0, policy_version 57002 (0.0010) -[2024-07-05 14:43:01,723][07115] Updated weights for policy 0, policy_version 57012 (0.0010) -[2024-07-05 14:43:03,912][03359] Fps is (10 sec: 35257.4, 60 sec: 36317.8, 300 sec: 37350.0). Total num frames: 447102976. Throughput: 0: 9109.2. Samples: 11754792. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:43:03,914][03359] Avg episode reward: [(0, '52.420')] -[2024-07-05 14:43:04,280][07115] Updated weights for policy 0, policy_version 57022 (0.0011) -[2024-07-05 14:43:07,152][07115] Updated weights for policy 0, policy_version 57032 (0.0019) -[2024-07-05 14:43:08,913][03359] Fps is (10 sec: 30309.6, 60 sec: 35638.4, 300 sec: 37211.1). Total num frames: 447242240. Throughput: 0: 8870.1. Samples: 11799844. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:43:08,915][03359] Avg episode reward: [(0, '53.429')] -[2024-07-05 14:43:10,309][07115] Updated weights for policy 0, policy_version 57042 (0.0023) -[2024-07-05 14:43:13,924][03359] Fps is (10 sec: 18823.5, 60 sec: 33308.8, 300 sec: 36737.8). Total num frames: 447291392. Throughput: 0: 8153.3. Samples: 11822532. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:43:13,977][03359] Avg episode reward: [(0, '53.706')] -[2024-07-05 14:43:18,926][03359] Fps is (10 sec: 5727.8, 60 sec: 30304.5, 300 sec: 36126.7). Total num frames: 447299584. Throughput: 0: 7562.2. Samples: 11823028. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:43:18,997][03359] Avg episode reward: [(0, '53.879')] -[2024-07-05 14:43:23,922][03359] Fps is (10 sec: 819.4, 60 sec: 27305.1, 300 sec: 35488.5). Total num frames: 447299584. Throughput: 0: 6394.2. Samples: 11824064. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:43:23,981][03359] Avg episode reward: [(0, '54.347')] -[2024-07-05 14:43:28,923][03359] Fps is (10 sec: 0.0, 60 sec: 24162.8, 300 sec: 34877.5). Total num frames: 447299584. Throughput: 0: 5218.5. Samples: 11825152. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:43:28,983][03359] Avg episode reward: [(0, '54.357')] -[2024-07-05 14:43:33,920][03359] Fps is (10 sec: 819.3, 60 sec: 21297.0, 300 sec: 34266.8). Total num frames: 447307776. Throughput: 0: 4609.7. Samples: 11825776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:43:33,975][03359] Avg episode reward: [(0, '54.821')] -[2024-07-05 14:43:38,923][03359] Fps is (10 sec: 819.2, 60 sec: 18429.5, 300 sec: 33627.9). Total num frames: 447307776. Throughput: 0: 3397.6. Samples: 11826776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:43:38,983][03359] Avg episode reward: [(0, '54.860')] -[2024-07-05 14:43:43,930][03359] Fps is (10 sec: 818.9, 60 sec: 15562.2, 300 sec: 33044.6). Total num frames: 447315968. Throughput: 0: 2188.9. Samples: 11827892. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:43:43,983][03359] Avg episode reward: [(0, '54.937')] -[2024-07-05 14:43:48,924][03359] Fps is (10 sec: 819.1, 60 sec: 12422.5, 300 sec: 32405.9). Total num frames: 447315968. Throughput: 0: 1638.0. Samples: 11828516. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:43:48,982][03359] Avg episode reward: [(0, '54.771')] -[2024-07-05 14:43:49,455][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000057046_447315968.pth... -[2024-07-05 14:43:52,946][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000056129_439803904.pth -[2024-07-05 14:43:53,922][03359] Fps is (10 sec: 0.0, 60 sec: 9421.1, 300 sec: 31767.5). Total num frames: 447315968. Throughput: 0: 664.5. Samples: 11829752. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:43:53,961][03359] Avg episode reward: [(0, '54.781')] -[2024-07-05 14:43:58,922][03359] Fps is (10 sec: 819.4, 60 sec: 6416.3, 300 sec: 31184.4). Total num frames: 447324160. Throughput: 0: 182.1. Samples: 11830724. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:43:58,968][03359] Avg episode reward: [(0, '54.821')] -[2024-07-05 14:44:03,923][03359] Fps is (10 sec: 1638.2, 60 sec: 3822.4, 300 sec: 30573.9). Total num frames: 447332352. Throughput: 0: 182.7. Samples: 11831248. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:03,992][03359] Avg episode reward: [(0, '54.606')] -[2024-07-05 14:44:08,922][03359] Fps is (10 sec: 819.1, 60 sec: 1501.7, 300 sec: 29934.6). Total num frames: 447332352. Throughput: 0: 186.3. Samples: 11832448. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:09,003][03359] Avg episode reward: [(0, '54.750')] -[2024-07-05 14:44:10,444][07115] Updated weights for policy 0, policy_version 57052 (0.0286) -[2024-07-05 14:44:13,485][07115] Updated weights for policy 0, policy_version 57062 (0.0016) -[2024-07-05 14:44:13,912][03359] Fps is (10 sec: 12298.1, 60 sec: 2731.1, 300 sec: 29713.4). Total num frames: 447455232. Throughput: 0: 749.2. Samples: 11858860. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:13,914][03359] Avg episode reward: [(0, '53.114')] -[2024-07-05 14:44:16,281][07115] Updated weights for policy 0, policy_version 57072 (0.0018) -[2024-07-05 14:44:18,912][03359] Fps is (10 sec: 27056.6, 60 sec: 5052.7, 300 sec: 29602.3). Total num frames: 447602688. Throughput: 0: 1217.2. Samples: 11880544. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:18,914][03359] Avg episode reward: [(0, '51.558')] -[2024-07-05 14:44:18,924][07115] Updated weights for policy 0, policy_version 57082 (0.0010) -[2024-07-05 14:44:21,330][07115] Updated weights for policy 0, policy_version 57092 (0.0009) -[2024-07-05 14:44:23,461][07115] Updated weights for policy 0, policy_version 57102 (0.0009) -[2024-07-05 14:44:23,912][03359] Fps is (10 sec: 33587.6, 60 sec: 8193.1, 300 sec: 29602.3). Total num frames: 447791104. Throughput: 0: 2323.0. Samples: 11931292. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:23,913][03359] Avg episode reward: [(0, '52.986')] -[2024-07-05 14:44:25,634][07115] Updated weights for policy 0, policy_version 57112 (0.0013) -[2024-07-05 14:44:27,741][07115] Updated weights for policy 0, policy_version 57122 (0.0008) -[2024-07-05 14:44:28,912][03359] Fps is (10 sec: 37683.2, 60 sec: 11334.0, 300 sec: 29602.3). Total num frames: 447979520. Throughput: 0: 3577.0. Samples: 11988820. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:28,913][03359] Avg episode reward: [(0, '51.734')] -[2024-07-05 14:44:29,881][07115] Updated weights for policy 0, policy_version 57132 (0.0009) -[2024-07-05 14:44:32,087][07115] Updated weights for policy 0, policy_version 57142 (0.0010) -[2024-07-05 14:44:33,912][03359] Fps is (10 sec: 37683.1, 60 sec: 14337.5, 300 sec: 29602.3). Total num frames: 448167936. Throughput: 0: 4188.0. Samples: 12016936. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:33,913][03359] Avg episode reward: [(0, '54.819')] -[2024-07-05 14:44:34,305][07115] Updated weights for policy 0, policy_version 57152 (0.0009) -[2024-07-05 14:44:36,503][07115] Updated weights for policy 0, policy_version 57162 (0.0013) -[2024-07-05 14:44:38,750][07115] Updated weights for policy 0, policy_version 57172 (0.0009) -[2024-07-05 14:44:38,913][03359] Fps is (10 sec: 36862.4, 60 sec: 17342.1, 300 sec: 29602.2). Total num frames: 448348160. Throughput: 0: 5395.6. Samples: 12072520. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:38,913][03359] Avg episode reward: [(0, '53.671')] -[2024-07-05 14:44:40,944][07115] Updated weights for policy 0, policy_version 57182 (0.0010) -[2024-07-05 14:44:43,270][07115] Updated weights for policy 0, policy_version 57192 (0.0010) -[2024-07-05 14:44:43,912][03359] Fps is (10 sec: 36044.6, 60 sec: 20210.3, 300 sec: 29574.5). Total num frames: 448528384. Throughput: 0: 6578.2. Samples: 12126696. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:43,913][03359] Avg episode reward: [(0, '55.418')] -[2024-07-05 14:44:45,712][07115] Updated weights for policy 0, policy_version 57202 (0.0010) -[2024-07-05 14:44:48,014][07115] Updated weights for policy 0, policy_version 57212 (0.0012) -[2024-07-05 14:44:48,912][03359] Fps is (10 sec: 35226.9, 60 sec: 23077.9, 300 sec: 29519.0). Total num frames: 448700416. Throughput: 0: 7141.9. Samples: 12152576. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:48,914][03359] Avg episode reward: [(0, '55.295')] -[2024-07-05 14:44:50,329][07115] Updated weights for policy 0, policy_version 57222 (0.0010) -[2024-07-05 14:44:52,600][07115] Updated weights for policy 0, policy_version 57232 (0.0010) -[2024-07-05 14:44:53,912][03359] Fps is (10 sec: 35225.2, 60 sec: 26080.9, 300 sec: 29491.2). Total num frames: 448880640. Throughput: 0: 8306.9. Samples: 12206188. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:53,914][03359] Avg episode reward: [(0, '53.810')] -[2024-07-05 14:44:54,916][07115] Updated weights for policy 0, policy_version 57242 (0.0010) -[2024-07-05 14:44:57,144][07115] Updated weights for policy 0, policy_version 57252 (0.0009) -[2024-07-05 14:44:58,912][03359] Fps is (10 sec: 36864.3, 60 sec: 29085.0, 300 sec: 29519.0). Total num frames: 449069056. Throughput: 0: 8925.8. Samples: 12260520. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:44:58,913][03359] Avg episode reward: [(0, '54.537')] -[2024-07-05 14:44:59,348][07115] Updated weights for policy 0, policy_version 57262 (0.0012) -[2024-07-05 14:45:01,512][07115] Updated weights for policy 0, policy_version 57272 (0.0010) -[2024-07-05 14:45:03,699][07115] Updated weights for policy 0, policy_version 57282 (0.0011) -[2024-07-05 14:45:03,912][03359] Fps is (10 sec: 36863.8, 60 sec: 31953.1, 300 sec: 29491.2). Total num frames: 449249280. Throughput: 0: 9070.9. Samples: 12288736. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:45:03,914][03359] Avg episode reward: [(0, '56.175')] -[2024-07-05 14:45:05,879][07115] Updated weights for policy 0, policy_version 57292 (0.0010) -[2024-07-05 14:45:08,119][07115] Updated weights for policy 0, policy_version 57302 (0.0011) -[2024-07-05 14:45:08,912][03359] Fps is (10 sec: 36863.6, 60 sec: 35094.0, 300 sec: 29518.9). Total num frames: 449437696. Throughput: 0: 9166.5. Samples: 12343788. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:45:08,914][03359] Avg episode reward: [(0, '55.761')] -[2024-07-05 14:45:10,368][07115] Updated weights for policy 0, policy_version 57312 (0.0010) -[2024-07-05 14:45:12,587][07115] Updated weights for policy 0, policy_version 57322 (0.0009) -[2024-07-05 14:45:13,912][03359] Fps is (10 sec: 37684.0, 60 sec: 36181.4, 300 sec: 29519.0). Total num frames: 449626112. Throughput: 0: 9127.9. Samples: 12399576. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:45:13,913][03359] Avg episode reward: [(0, '54.419')] -[2024-07-05 14:45:14,776][07115] Updated weights for policy 0, policy_version 57332 (0.0012) -[2024-07-05 14:45:16,995][07115] Updated weights for policy 0, policy_version 57342 (0.0009) -[2024-07-05 14:45:18,914][03359] Fps is (10 sec: 36858.4, 60 sec: 36726.5, 300 sec: 29463.3). Total num frames: 449806336. Throughput: 0: 9124.3. Samples: 12427544. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:45:18,915][03359] Avg episode reward: [(0, '53.002')] -[2024-07-05 14:45:19,215][07115] Updated weights for policy 0, policy_version 57352 (0.0009) -[2024-07-05 14:45:21,407][07115] Updated weights for policy 0, policy_version 57362 (0.0011) -[2024-07-05 14:45:23,601][07115] Updated weights for policy 0, policy_version 57372 (0.0009) -[2024-07-05 14:45:23,912][03359] Fps is (10 sec: 36863.9, 60 sec: 36727.4, 300 sec: 29463.4). Total num frames: 449994752. Throughput: 0: 9134.7. Samples: 12483576. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:45:23,913][03359] Avg episode reward: [(0, '55.067')] -[2024-07-05 14:45:24,204][07095] Stopping Batcher_0... -[2024-07-05 14:45:24,205][07095] Loop batcher_evt_loop terminating... -[2024-07-05 14:45:24,204][03359] Component Batcher_0 stopped! -[2024-07-05 14:45:24,206][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000057375_450011136.pth... -[2024-07-05 14:45:24,233][07145] Stopping RolloutWorker_w14... -[2024-07-05 14:45:24,232][03359] Component RolloutWorker_w14 stopped! -[2024-07-05 14:45:24,234][07145] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 14:45:24,235][07117] Stopping RolloutWorker_w1... -[2024-07-05 14:45:24,235][07117] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 14:45:24,236][07123] Stopping RolloutWorker_w4... -[2024-07-05 14:45:24,237][07123] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 14:45:24,235][03359] Component RolloutWorker_w1 stopped! -[2024-07-05 14:45:24,237][07146] Stopping RolloutWorker_w15... -[2024-07-05 14:45:24,238][07146] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 14:45:24,238][03359] Component RolloutWorker_w4 stopped! -[2024-07-05 14:45:24,239][07121] Stopping RolloutWorker_w5... -[2024-07-05 14:45:24,239][07118] Stopping RolloutWorker_w3... -[2024-07-05 14:45:24,239][07116] Stopping RolloutWorker_w0... -[2024-07-05 14:45:24,240][07121] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 14:45:24,240][07116] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 14:45:24,240][07118] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 14:45:24,239][03359] Component RolloutWorker_w15 stopped! -[2024-07-05 14:45:24,242][03359] Component RolloutWorker_w5 stopped! -[2024-07-05 14:45:24,243][07120] Stopping RolloutWorker_w6... -[2024-07-05 14:45:24,243][03359] Component RolloutWorker_w3 stopped! -[2024-07-05 14:45:24,243][07120] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 14:45:24,244][07143] Stopping RolloutWorker_w11... -[2024-07-05 14:45:24,245][07143] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 14:45:24,245][07119] Stopping RolloutWorker_w2... -[2024-07-05 14:45:24,244][03359] Component RolloutWorker_w0 stopped! -[2024-07-05 14:45:24,246][07119] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 14:45:24,246][07144] Stopping RolloutWorker_w13... -[2024-07-05 14:45:24,247][07144] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 14:45:24,246][03359] Component RolloutWorker_w6 stopped! -[2024-07-05 14:45:24,247][03359] Component RolloutWorker_w11 stopped! -[2024-07-05 14:45:24,248][07142] Stopping RolloutWorker_w12... -[2024-07-05 14:45:24,249][07142] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 14:45:24,248][03359] Component RolloutWorker_w2 stopped! -[2024-07-05 14:45:24,249][03359] Component RolloutWorker_w13 stopped! -[2024-07-05 14:45:24,253][03359] Component RolloutWorker_w12 stopped! -[2024-07-05 14:45:24,256][07125] Stopping RolloutWorker_w10... -[2024-07-05 14:45:24,256][03359] Component RolloutWorker_w10 stopped! -[2024-07-05 14:45:24,258][07125] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 14:45:24,264][07124] Stopping RolloutWorker_w7... -[2024-07-05 14:45:24,264][07124] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 14:45:24,264][03359] Component RolloutWorker_w7 stopped! -[2024-07-05 14:45:24,267][07126] Stopping RolloutWorker_w9... -[2024-07-05 14:45:24,267][03359] Component RolloutWorker_w9 stopped! -[2024-07-05 14:45:24,268][07126] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 14:45:24,269][07115] Weights refcount: 2 0 -[2024-07-05 14:45:24,270][07115] Stopping InferenceWorker_p0-w0... -[2024-07-05 14:45:24,271][07115] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 14:45:24,271][03359] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 14:45:24,283][03359] Component RolloutWorker_w8 stopped! -[2024-07-05 14:45:24,282][07122] Stopping RolloutWorker_w8... -[2024-07-05 14:45:24,292][07122] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 14:45:24,347][07095] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000056686_444366848.pth -[2024-07-05 14:45:24,359][07095] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000057375_450011136.pth... -[2024-07-05 14:45:24,497][07095] Stopping LearnerWorker_p0... -[2024-07-05 14:45:24,498][07095] Loop learner_proc0_evt_loop terminating... -[2024-07-05 14:45:24,497][03359] Component LearnerWorker_p0 stopped! -[2024-07-05 14:45:24,498][03359] Waiting for process learner_proc0 to stop... -[2024-07-05 14:45:25,798][03359] Waiting for process inference_proc0-0 to join... -[2024-07-05 14:45:25,799][03359] Waiting for process rollout_proc0 to join... -[2024-07-05 14:45:25,799][03359] Waiting for process rollout_proc1 to join... -[2024-07-05 14:45:25,800][03359] Waiting for process rollout_proc2 to join... -[2024-07-05 14:45:25,800][03359] Waiting for process rollout_proc3 to join... -[2024-07-05 14:45:25,801][03359] Waiting for process rollout_proc4 to join... -[2024-07-05 14:45:25,801][03359] Waiting for process rollout_proc5 to join... -[2024-07-05 14:45:25,802][03359] Waiting for process rollout_proc6 to join... -[2024-07-05 14:45:25,802][03359] Waiting for process rollout_proc7 to join... -[2024-07-05 14:45:25,802][03359] Waiting for process rollout_proc8 to join... -[2024-07-05 14:45:25,803][03359] Waiting for process rollout_proc9 to join... -[2024-07-05 14:45:25,803][03359] Waiting for process rollout_proc10 to join... -[2024-07-05 14:45:25,803][03359] Waiting for process rollout_proc11 to join... -[2024-07-05 14:45:25,804][03359] Waiting for process rollout_proc12 to join... -[2024-07-05 14:45:25,804][03359] Waiting for process rollout_proc13 to join... -[2024-07-05 14:45:25,804][03359] Waiting for process rollout_proc14 to join... -[2024-07-05 14:45:25,804][03359] Waiting for process rollout_proc15 to join... -[2024-07-05 14:45:25,805][03359] Batcher 0 profile tree view: -batching: 122.2405, releasing_batches: 0.1626 -[2024-07-05 14:45:25,805][03359] InferenceWorker_p0-w0 profile tree view: +[2024-07-05 10:44:19,253][22225] Using optimizer +[2024-07-05 10:44:19,778][22225] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-07-05 10:44:19,812][22225] Loading model from checkpoint +[2024-07-05 10:44:19,814][22225] Loaded experiment state at self.train_step=2443, self.env_steps=10006528 +[2024-07-05 10:44:19,814][22225] Initialized policy 0 weights for model version 2443 +[2024-07-05 10:44:19,815][22225] LearnerWorker_p0 finished initialization! +[2024-07-05 10:44:19,815][22225] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 10:44:19,874][22239] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 10:44:19,877][22239] RunningMeanStd input shape: (1,) +[2024-07-05 10:44:19,885][22239] Num input channels: 3 +[2024-07-05 10:44:19,896][22239] Convolutional layer output size: 4608 +[2024-07-05 10:44:19,907][22239] Policy head output size: 512 +[2024-07-05 10:44:20,034][17621] Inference worker 0-0 is ready! +[2024-07-05 10:44:20,035][17621] All inference workers are ready! Signal rollout workers to start! +[2024-07-05 10:44:20,063][22245] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:44:20,063][22238] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:44:20,063][22246] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:44:20,063][22243] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:44:20,063][22241] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:44:20,063][22242] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:44:20,063][22240] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:44:20,063][22244] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 10:44:20,544][22238] Decorrelating experience for 0 frames... +[2024-07-05 10:44:20,545][22241] Decorrelating experience for 0 frames... +[2024-07-05 10:44:20,545][22243] Decorrelating experience for 0 frames... +[2024-07-05 10:44:20,546][22245] Decorrelating experience for 0 frames... +[2024-07-05 10:44:20,547][22246] Decorrelating experience for 0 frames... +[2024-07-05 10:44:20,547][22242] Decorrelating experience for 0 frames... +[2024-07-05 10:44:20,548][22240] Decorrelating experience for 0 frames... +[2024-07-05 10:44:20,696][22238] Decorrelating experience for 32 frames... +[2024-07-05 10:44:20,697][22242] Decorrelating experience for 32 frames... +[2024-07-05 10:44:20,697][22245] Decorrelating experience for 32 frames... +[2024-07-05 10:44:20,697][22246] Decorrelating experience for 32 frames... +[2024-07-05 10:44:20,761][22243] Decorrelating experience for 32 frames... +[2024-07-05 10:44:20,862][22241] Decorrelating experience for 32 frames... +[2024-07-05 10:44:20,864][22240] Decorrelating experience for 32 frames... +[2024-07-05 10:44:20,901][22238] Decorrelating experience for 64 frames... +[2024-07-05 10:44:20,903][22242] Decorrelating experience for 64 frames... +[2024-07-05 10:44:20,914][22245] Decorrelating experience for 64 frames... +[2024-07-05 10:44:20,964][22243] Decorrelating experience for 64 frames... +[2024-07-05 10:44:21,060][22244] Decorrelating experience for 0 frames... +[2024-07-05 10:44:21,060][22246] Decorrelating experience for 64 frames... +[2024-07-05 10:44:21,068][22241] Decorrelating experience for 64 frames... +[2024-07-05 10:44:21,074][22240] Decorrelating experience for 64 frames... +[2024-07-05 10:44:21,084][22238] Decorrelating experience for 96 frames... +[2024-07-05 10:44:21,106][22245] Decorrelating experience for 96 frames... +[2024-07-05 10:44:21,149][22243] Decorrelating experience for 96 frames... +[2024-07-05 10:44:21,238][22242] Decorrelating experience for 96 frames... +[2024-07-05 10:44:21,245][22241] Decorrelating experience for 96 frames... +[2024-07-05 10:44:21,274][22244] Decorrelating experience for 32 frames... +[2024-07-05 10:44:21,346][22246] Decorrelating experience for 96 frames... +[2024-07-05 10:44:21,370][22240] Decorrelating experience for 96 frames... +[2024-07-05 10:44:21,495][22244] Decorrelating experience for 64 frames... +[2024-07-05 10:44:21,667][22244] Decorrelating experience for 96 frames... +[2024-07-05 10:44:22,094][22225] Signal inference workers to stop experience collection... +[2024-07-05 10:44:22,100][22239] InferenceWorker_p0-w0: stopping experience collection +[2024-07-05 10:44:23,526][17621] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 10006528. Throughput: 0: nan. Samples: 2270. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 10:44:23,526][17621] Avg episode reward: [(0, '2.078')] +[2024-07-05 10:44:24,772][22225] Signal inference workers to resume experience collection... +[2024-07-05 10:44:24,772][22239] InferenceWorker_p0-w0: resuming experience collection +[2024-07-05 10:44:28,203][22239] Updated weights for policy 0, policy_version 2453 (0.0099) +[2024-07-05 10:44:28,525][17621] Fps is (10 sec: 8192.1, 60 sec: 8192.1, 300 sec: 8192.1). Total num frames: 10047488. Throughput: 0: 568.4. Samples: 5112. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-07-05 10:44:28,526][17621] Avg episode reward: [(0, '12.420')] +[2024-07-05 10:44:31,807][22239] Updated weights for policy 0, policy_version 2463 (0.0012) +[2024-07-05 10:44:33,525][17621] Fps is (10 sec: 9830.5, 60 sec: 9830.5, 300 sec: 9830.5). Total num frames: 10104832. Throughput: 0: 1968.8. Samples: 21958. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:44:33,526][17621] Avg episode reward: [(0, '25.039')] +[2024-07-05 10:44:35,424][22239] Updated weights for policy 0, policy_version 2473 (0.0012) +[2024-07-05 10:44:35,898][17621] Heartbeat connected on Batcher_0 +[2024-07-05 10:44:35,911][17621] Heartbeat connected on RolloutWorker_w0 +[2024-07-05 10:44:35,915][17621] Heartbeat connected on RolloutWorker_w1 +[2024-07-05 10:44:35,917][17621] Heartbeat connected on RolloutWorker_w2 +[2024-07-05 10:44:35,920][17621] Heartbeat connected on RolloutWorker_w3 +[2024-07-05 10:44:35,923][17621] Heartbeat connected on RolloutWorker_w4 +[2024-07-05 10:44:35,924][17621] Heartbeat connected on InferenceWorker_p0-w0 +[2024-07-05 10:44:35,927][17621] Heartbeat connected on RolloutWorker_w5 +[2024-07-05 10:44:35,930][17621] Heartbeat connected on RolloutWorker_w6 +[2024-07-05 10:44:35,933][17621] Heartbeat connected on RolloutWorker_w7 +[2024-07-05 10:44:36,142][17621] Heartbeat connected on LearnerWorker_p0 +[2024-07-05 10:44:38,526][17621] Fps is (10 sec: 11468.8, 60 sec: 10376.5, 300 sec: 10376.5). Total num frames: 10162176. Throughput: 0: 2447.2. Samples: 38978. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:44:38,526][17621] Avg episode reward: [(0, '31.312')] +[2024-07-05 10:44:39,050][22239] Updated weights for policy 0, policy_version 2483 (0.0012) +[2024-07-05 10:44:42,661][22239] Updated weights for policy 0, policy_version 2493 (0.0012) +[2024-07-05 10:44:43,526][17621] Fps is (10 sec: 11468.7, 60 sec: 10649.6, 300 sec: 10649.6). Total num frames: 10219520. Throughput: 0: 2258.3. Samples: 47436. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 10:44:43,526][17621] Avg episode reward: [(0, '33.632')] +[2024-07-05 10:44:46,335][22239] Updated weights for policy 0, policy_version 2503 (0.0012) +[2024-07-05 10:44:48,525][17621] Fps is (10 sec: 11468.8, 60 sec: 10813.5, 300 sec: 10813.5). Total num frames: 10276864. Throughput: 0: 2474.2. Samples: 64124. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:44:48,527][17621] Avg episode reward: [(0, '33.295')] +[2024-07-05 10:44:49,952][22239] Updated weights for policy 0, policy_version 2513 (0.0012) +[2024-07-05 10:44:53,526][17621] Fps is (10 sec: 11059.2, 60 sec: 10786.1, 300 sec: 10786.1). Total num frames: 10330112. Throughput: 0: 2636.4. Samples: 81362. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:44:53,526][17621] Avg episode reward: [(0, '34.199')] +[2024-07-05 10:44:53,556][22239] Updated weights for policy 0, policy_version 2523 (0.0012) +[2024-07-05 10:44:57,157][22239] Updated weights for policy 0, policy_version 2533 (0.0012) +[2024-07-05 10:44:58,525][17621] Fps is (10 sec: 11059.2, 60 sec: 10883.7, 300 sec: 10883.7). Total num frames: 10387456. Throughput: 0: 2500.9. Samples: 89802. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 10:44:58,526][17621] Avg episode reward: [(0, '32.141')] +[2024-07-05 10:45:00,759][22239] Updated weights for policy 0, policy_version 2543 (0.0012) +[2024-07-05 10:45:03,525][17621] Fps is (10 sec: 11468.9, 60 sec: 10956.8, 300 sec: 10956.8). Total num frames: 10444800. Throughput: 0: 2619.9. Samples: 107066. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:45:03,526][17621] Avg episode reward: [(0, '32.091')] +[2024-07-05 10:45:04,293][22239] Updated weights for policy 0, policy_version 2553 (0.0012) +[2024-07-05 10:45:07,830][22239] Updated weights for policy 0, policy_version 2563 (0.0012) +[2024-07-05 10:45:08,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11013.7, 300 sec: 11013.7). Total num frames: 10502144. Throughput: 0: 2715.5. Samples: 124468. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:45:08,526][17621] Avg episode reward: [(0, '29.199')] +[2024-07-05 10:45:11,363][22239] Updated weights for policy 0, policy_version 2573 (0.0012) +[2024-07-05 10:45:13,526][17621] Fps is (10 sec: 11878.2, 60 sec: 11141.1, 300 sec: 11141.1). Total num frames: 10563584. Throughput: 0: 2844.1. Samples: 133098. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:45:13,527][17621] Avg episode reward: [(0, '32.140')] +[2024-07-05 10:45:14,919][22239] Updated weights for policy 0, policy_version 2583 (0.0012) +[2024-07-05 10:45:18,482][22239] Updated weights for policy 0, policy_version 2593 (0.0012) +[2024-07-05 10:45:18,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11170.9, 300 sec: 11170.9). Total num frames: 10620928. Throughput: 0: 2851.8. Samples: 150288. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:45:18,526][17621] Avg episode reward: [(0, '31.332')] +[2024-07-05 10:45:22,019][22239] Updated weights for policy 0, policy_version 2603 (0.0012) +[2024-07-05 10:45:23,526][17621] Fps is (10 sec: 11468.9, 60 sec: 11195.7, 300 sec: 11195.7). Total num frames: 10678272. Throughput: 0: 2859.4. Samples: 167652. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:45:23,527][17621] Avg episode reward: [(0, '31.411')] +[2024-07-05 10:45:25,579][22239] Updated weights for policy 0, policy_version 2613 (0.0012) +[2024-07-05 10:45:28,525][17621] Fps is (10 sec: 11468.9, 60 sec: 11468.8, 300 sec: 11216.8). Total num frames: 10735616. Throughput: 0: 2868.9. Samples: 176534. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:45:28,526][17621] Avg episode reward: [(0, '30.890')] +[2024-07-05 10:45:29,150][22239] Updated weights for policy 0, policy_version 2623 (0.0011) +[2024-07-05 10:45:32,727][22239] Updated weights for policy 0, policy_version 2633 (0.0012) +[2024-07-05 10:45:33,525][17621] Fps is (10 sec: 11468.9, 60 sec: 11468.8, 300 sec: 11234.8). Total num frames: 10792960. Throughput: 0: 2875.8. Samples: 193534. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:45:33,527][17621] Avg episode reward: [(0, '28.854')] +[2024-07-05 10:45:36,294][22239] Updated weights for policy 0, policy_version 2643 (0.0012) +[2024-07-05 10:45:38,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11468.8, 300 sec: 11250.4). Total num frames: 10850304. Throughput: 0: 2874.8. Samples: 210730. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:45:38,526][17621] Avg episode reward: [(0, '29.227')] +[2024-07-05 10:45:39,854][22239] Updated weights for policy 0, policy_version 2653 (0.0012) +[2024-07-05 10:45:43,402][22239] Updated weights for policy 0, policy_version 2663 (0.0012) +[2024-07-05 10:45:43,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11468.8, 300 sec: 11264.0). Total num frames: 10907648. Throughput: 0: 2884.1. Samples: 219588. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 10:45:43,526][17621] Avg episode reward: [(0, '31.927')] +[2024-07-05 10:45:46,959][22239] Updated weights for policy 0, policy_version 2673 (0.0012) +[2024-07-05 10:45:48,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11468.8, 300 sec: 11276.1). Total num frames: 10964992. Throughput: 0: 2885.6. Samples: 236918. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0) +[2024-07-05 10:45:48,527][17621] Avg episode reward: [(0, '33.178')] +[2024-07-05 10:45:50,506][22239] Updated weights for policy 0, policy_version 2683 (0.0012) +[2024-07-05 10:45:53,525][17621] Fps is (10 sec: 11468.9, 60 sec: 11537.1, 300 sec: 11286.8). Total num frames: 11022336. Throughput: 0: 2878.9. Samples: 254020. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0) +[2024-07-05 10:45:53,526][17621] Avg episode reward: [(0, '34.124')] +[2024-07-05 10:45:54,072][22239] Updated weights for policy 0, policy_version 2693 (0.0012) +[2024-07-05 10:45:57,623][22239] Updated weights for policy 0, policy_version 2703 (0.0012) +[2024-07-05 10:45:58,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11537.1, 300 sec: 11296.3). Total num frames: 11079680. Throughput: 0: 2881.7. Samples: 262772. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0) +[2024-07-05 10:45:58,526][17621] Avg episode reward: [(0, '32.672')] +[2024-07-05 10:46:01,157][22239] Updated weights for policy 0, policy_version 2713 (0.0012) +[2024-07-05 10:46:03,526][17621] Fps is (10 sec: 11468.6, 60 sec: 11537.0, 300 sec: 11305.0). Total num frames: 11137024. Throughput: 0: 2886.4. Samples: 280176. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:46:03,527][17621] Avg episode reward: [(0, '32.289')] +[2024-07-05 10:46:04,715][22239] Updated weights for policy 0, policy_version 2723 (0.0012) +[2024-07-05 10:46:08,248][22239] Updated weights for policy 0, policy_version 2733 (0.0012) +[2024-07-05 10:46:08,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11537.1, 300 sec: 11312.8). Total num frames: 11194368. Throughput: 0: 2886.8. Samples: 297556. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:46:08,527][17621] Avg episode reward: [(0, '30.228')] +[2024-07-05 10:46:11,775][22239] Updated weights for policy 0, policy_version 2743 (0.0012) +[2024-07-05 10:46:13,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11468.8, 300 sec: 11319.8). Total num frames: 11251712. Throughput: 0: 2877.8. Samples: 306034. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:46:13,527][17621] Avg episode reward: [(0, '30.845')] +[2024-07-05 10:46:13,530][22225] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000002748_11255808.pth... +[2024-07-05 10:46:13,604][22225] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000002245_9195520.pth +[2024-07-05 10:46:15,263][22239] Updated weights for policy 0, policy_version 2753 (0.0012) +[2024-07-05 10:46:18,525][17621] Fps is (10 sec: 11878.4, 60 sec: 11537.1, 300 sec: 11362.0). Total num frames: 11313152. Throughput: 0: 2895.8. Samples: 323844. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 10:46:18,526][17621] Avg episode reward: [(0, '30.248')] +[2024-07-05 10:46:18,717][22239] Updated weights for policy 0, policy_version 2763 (0.0011) +[2024-07-05 10:46:22,151][22239] Updated weights for policy 0, policy_version 2773 (0.0011) +[2024-07-05 10:46:23,525][17621] Fps is (10 sec: 12288.2, 60 sec: 11605.4, 300 sec: 11400.5). Total num frames: 11374592. Throughput: 0: 2912.4. Samples: 341786. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:46:23,526][17621] Avg episode reward: [(0, '30.197')] +[2024-07-05 10:46:25,592][22239] Updated weights for policy 0, policy_version 2783 (0.0011) +[2024-07-05 10:46:28,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11605.3, 300 sec: 11403.3). Total num frames: 11431936. Throughput: 0: 2915.2. Samples: 350770. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:46:28,527][17621] Avg episode reward: [(0, '31.984')] +[2024-07-05 10:46:29,112][22239] Updated weights for policy 0, policy_version 2793 (0.0011) +[2024-07-05 10:46:32,617][22239] Updated weights for policy 0, policy_version 2803 (0.0011) +[2024-07-05 10:46:33,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11405.8). Total num frames: 11489280. Throughput: 0: 2917.5. Samples: 368204. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:46:33,527][17621] Avg episode reward: [(0, '33.702')] +[2024-07-05 10:46:36,088][22239] Updated weights for policy 0, policy_version 2813 (0.0011) +[2024-07-05 10:46:38,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11408.1). Total num frames: 11546624. Throughput: 0: 2928.5. Samples: 385804. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:46:38,526][17621] Avg episode reward: [(0, '33.315')] +[2024-07-05 10:46:39,595][22239] Updated weights for policy 0, policy_version 2823 (0.0012) +[2024-07-05 10:46:43,057][22239] Updated weights for policy 0, policy_version 2833 (0.0011) +[2024-07-05 10:46:43,525][17621] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11439.6). Total num frames: 11608064. Throughput: 0: 2931.8. Samples: 394702. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:46:43,526][17621] Avg episode reward: [(0, '33.610')] +[2024-07-05 10:46:46,574][22239] Updated weights for policy 0, policy_version 2843 (0.0012) +[2024-07-05 10:46:48,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11440.6). Total num frames: 11665408. Throughput: 0: 2936.0. Samples: 412296. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:46:48,526][17621] Avg episode reward: [(0, '34.826')] +[2024-07-05 10:46:48,648][22225] Saving new best policy, reward=34.826! +[2024-07-05 10:46:50,058][22239] Updated weights for policy 0, policy_version 2853 (0.0012) +[2024-07-05 10:46:53,526][17621] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11441.5). Total num frames: 11722752. Throughput: 0: 2939.5. Samples: 429832. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:46:53,526][17621] Avg episode reward: [(0, '35.397')] +[2024-07-05 10:46:53,532][22225] Saving new best policy, reward=35.397! +[2024-07-05 10:46:53,534][22239] Updated weights for policy 0, policy_version 2863 (0.0011) +[2024-07-05 10:46:57,050][22239] Updated weights for policy 0, policy_version 2873 (0.0014) +[2024-07-05 10:46:58,525][17621] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11468.8). Total num frames: 11784192. Throughput: 0: 2943.9. Samples: 438510. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:46:58,526][17621] Avg episode reward: [(0, '34.461')] +[2024-07-05 10:47:00,528][22239] Updated weights for policy 0, policy_version 2883 (0.0011) +[2024-07-05 10:47:03,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11468.8). Total num frames: 11841536. Throughput: 0: 2944.3. Samples: 456338. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:47:03,526][17621] Avg episode reward: [(0, '31.910')] +[2024-07-05 10:47:03,982][22239] Updated weights for policy 0, policy_version 2893 (0.0011) +[2024-07-05 10:47:07,426][22239] Updated weights for policy 0, policy_version 2903 (0.0011) +[2024-07-05 10:47:08,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11493.6). Total num frames: 11902976. Throughput: 0: 2938.0. Samples: 473996. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:47:08,526][17621] Avg episode reward: [(0, '31.829')] +[2024-07-05 10:47:10,881][22239] Updated weights for policy 0, policy_version 2913 (0.0011) +[2024-07-05 10:47:13,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11492.9). Total num frames: 11960320. Throughput: 0: 2938.6. Samples: 483008. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:47:13,526][17621] Avg episode reward: [(0, '33.901')] +[2024-07-05 10:47:14,321][22239] Updated weights for policy 0, policy_version 2923 (0.0011) +[2024-07-05 10:47:17,780][22239] Updated weights for policy 0, policy_version 2933 (0.0011) +[2024-07-05 10:47:18,525][17621] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11515.6). Total num frames: 12021760. Throughput: 0: 2943.9. Samples: 500678. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:47:18,526][17621] Avg episode reward: [(0, '35.163')] +[2024-07-05 10:47:21,228][22239] Updated weights for policy 0, policy_version 2943 (0.0011) +[2024-07-05 10:47:23,526][17621] Fps is (10 sec: 11878.2, 60 sec: 11741.8, 300 sec: 11514.3). Total num frames: 12079104. Throughput: 0: 2955.3. Samples: 518794. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 10:47:23,527][17621] Avg episode reward: [(0, '33.913')] +[2024-07-05 10:47:24,675][22239] Updated weights for policy 0, policy_version 2953 (0.0011) +[2024-07-05 10:47:28,120][22239] Updated weights for policy 0, policy_version 2963 (0.0011) +[2024-07-05 10:47:28,525][17621] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11535.2). Total num frames: 12140544. Throughput: 0: 2949.0. Samples: 527406. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:47:28,526][17621] Avg episode reward: [(0, '35.517')] +[2024-07-05 10:47:28,527][22225] Saving new best policy, reward=35.517! +[2024-07-05 10:47:31,580][22239] Updated weights for policy 0, policy_version 2973 (0.0011) +[2024-07-05 10:47:33,526][17621] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11533.5). Total num frames: 12197888. Throughput: 0: 2959.5. Samples: 545472. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:47:33,527][17621] Avg episode reward: [(0, '34.441')] +[2024-07-05 10:47:35,028][22239] Updated weights for policy 0, policy_version 2983 (0.0011) +[2024-07-05 10:47:38,520][22239] Updated weights for policy 0, policy_version 2993 (0.0012) +[2024-07-05 10:47:38,525][17621] Fps is (10 sec: 11878.4, 60 sec: 11878.4, 300 sec: 11552.8). Total num frames: 12259328. Throughput: 0: 2960.7. Samples: 563062. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:47:38,526][17621] Avg episode reward: [(0, '34.588')] +[2024-07-05 10:47:41,981][22239] Updated weights for policy 0, policy_version 3003 (0.0011) +[2024-07-05 10:47:43,526][17621] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11550.7). Total num frames: 12316672. Throughput: 0: 2968.5. Samples: 572094. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:47:43,526][17621] Avg episode reward: [(0, '32.322')] +[2024-07-05 10:47:45,456][22239] Updated weights for policy 0, policy_version 3013 (0.0011) +[2024-07-05 10:47:48,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11810.1, 300 sec: 11548.7). Total num frames: 12374016. Throughput: 0: 2960.8. Samples: 589574. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:47:48,526][17621] Avg episode reward: [(0, '31.797')] +[2024-07-05 10:47:48,965][22239] Updated weights for policy 0, policy_version 3023 (0.0011) +[2024-07-05 10:47:52,411][22239] Updated weights for policy 0, policy_version 3033 (0.0011) +[2024-07-05 10:47:53,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11878.4, 300 sec: 11566.3). Total num frames: 12435456. Throughput: 0: 2960.5. Samples: 607218. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:47:53,526][17621] Avg episode reward: [(0, '31.331')] +[2024-07-05 10:47:55,869][22239] Updated weights for policy 0, policy_version 3043 (0.0011) +[2024-07-05 10:47:58,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11564.1). Total num frames: 12492800. Throughput: 0: 2960.5. Samples: 616230. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:47:58,527][17621] Avg episode reward: [(0, '33.645')] +[2024-07-05 10:47:59,338][22239] Updated weights for policy 0, policy_version 3053 (0.0011) +[2024-07-05 10:48:02,819][22239] Updated weights for policy 0, policy_version 3063 (0.0011) +[2024-07-05 10:48:03,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11810.1, 300 sec: 11561.9). Total num frames: 12550144. Throughput: 0: 2958.0. Samples: 633790. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:03,526][17621] Avg episode reward: [(0, '34.520')] +[2024-07-05 10:48:06,339][22239] Updated weights for policy 0, policy_version 3073 (0.0011) +[2024-07-05 10:48:08,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11578.0). Total num frames: 12611584. Throughput: 0: 2944.2. Samples: 651284. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:08,527][17621] Avg episode reward: [(0, '34.222')] +[2024-07-05 10:48:09,813][22239] Updated weights for policy 0, policy_version 3083 (0.0012) +[2024-07-05 10:48:13,289][22239] Updated weights for policy 0, policy_version 3093 (0.0011) +[2024-07-05 10:48:13,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11575.7). Total num frames: 12668928. Throughput: 0: 2952.7. Samples: 660278. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:13,526][17621] Avg episode reward: [(0, '34.243')] +[2024-07-05 10:48:13,633][22225] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000003094_12673024.pth... +[2024-07-05 10:48:13,702][22225] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000002443_10006528.pth +[2024-07-05 10:48:16,759][22239] Updated weights for policy 0, policy_version 3103 (0.0011) +[2024-07-05 10:48:18,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11590.8). Total num frames: 12730368. Throughput: 0: 2942.1. Samples: 677864. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:18,526][17621] Avg episode reward: [(0, '34.706')] +[2024-07-05 10:48:20,245][22239] Updated weights for policy 0, policy_version 3113 (0.0011) +[2024-07-05 10:48:23,526][17621] Fps is (10 sec: 11878.2, 60 sec: 11810.1, 300 sec: 11588.3). Total num frames: 12787712. Throughput: 0: 2941.9. Samples: 695450. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:23,527][17621] Avg episode reward: [(0, '34.809')] +[2024-07-05 10:48:23,730][22239] Updated weights for policy 0, policy_version 3123 (0.0011) +[2024-07-05 10:48:27,240][22239] Updated weights for policy 0, policy_version 3133 (0.0011) +[2024-07-05 10:48:28,526][17621] Fps is (10 sec: 11468.5, 60 sec: 11741.8, 300 sec: 11585.8). Total num frames: 12845056. Throughput: 0: 2939.5. Samples: 704370. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:28,526][17621] Avg episode reward: [(0, '36.109')] +[2024-07-05 10:48:28,635][22225] Saving new best policy, reward=36.109! +[2024-07-05 10:48:30,712][22239] Updated weights for policy 0, policy_version 3143 (0.0011) +[2024-07-05 10:48:33,525][17621] Fps is (10 sec: 11878.6, 60 sec: 11810.1, 300 sec: 11599.9). Total num frames: 12906496. Throughput: 0: 2942.0. Samples: 721964. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:33,526][17621] Avg episode reward: [(0, '36.198')] +[2024-07-05 10:48:33,529][22225] Saving new best policy, reward=36.198! +[2024-07-05 10:48:34,211][22239] Updated weights for policy 0, policy_version 3153 (0.0012) +[2024-07-05 10:48:37,676][22239] Updated weights for policy 0, policy_version 3163 (0.0011) +[2024-07-05 10:48:38,526][17621] Fps is (10 sec: 11878.5, 60 sec: 11741.8, 300 sec: 11597.3). Total num frames: 12963840. Throughput: 0: 2941.0. Samples: 739562. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:38,527][17621] Avg episode reward: [(0, '32.251')] +[2024-07-05 10:48:41,148][22239] Updated weights for policy 0, policy_version 3173 (0.0012) +[2024-07-05 10:48:43,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11594.8). Total num frames: 13021184. Throughput: 0: 2938.5. Samples: 748462. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:43,526][17621] Avg episode reward: [(0, '30.368')] +[2024-07-05 10:48:44,637][22239] Updated weights for policy 0, policy_version 3183 (0.0011) +[2024-07-05 10:48:48,135][22239] Updated weights for policy 0, policy_version 3193 (0.0011) +[2024-07-05 10:48:48,525][17621] Fps is (10 sec: 11878.6, 60 sec: 11810.1, 300 sec: 11607.9). Total num frames: 13082624. Throughput: 0: 2937.7. Samples: 765986. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:48,526][17621] Avg episode reward: [(0, '30.422')] +[2024-07-05 10:48:51,636][22239] Updated weights for policy 0, policy_version 3203 (0.0012) +[2024-07-05 10:48:53,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11605.3). Total num frames: 13139968. Throughput: 0: 2936.4. Samples: 783424. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:53,526][17621] Avg episode reward: [(0, '32.225')] +[2024-07-05 10:48:55,158][22239] Updated weights for policy 0, policy_version 3213 (0.0012) +[2024-07-05 10:48:58,526][17621] Fps is (10 sec: 11468.2, 60 sec: 11741.8, 300 sec: 11602.8). Total num frames: 13197312. Throughput: 0: 2933.7. Samples: 792298. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:48:58,527][17621] Avg episode reward: [(0, '33.683')] +[2024-07-05 10:48:58,761][22239] Updated weights for policy 0, policy_version 3223 (0.0012) +[2024-07-05 10:49:02,299][22239] Updated weights for policy 0, policy_version 3233 (0.0012) +[2024-07-05 10:49:03,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11600.5). Total num frames: 13254656. Throughput: 0: 2928.4. Samples: 809640. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:03,526][17621] Avg episode reward: [(0, '33.838')] +[2024-07-05 10:49:05,826][22239] Updated weights for policy 0, policy_version 3243 (0.0012) +[2024-07-05 10:49:08,526][17621] Fps is (10 sec: 11469.3, 60 sec: 11673.6, 300 sec: 11598.1). Total num frames: 13312000. Throughput: 0: 2923.7. Samples: 827016. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:08,526][17621] Avg episode reward: [(0, '30.303')] +[2024-07-05 10:49:09,350][22239] Updated weights for policy 0, policy_version 3253 (0.0012) +[2024-07-05 10:49:12,864][22239] Updated weights for policy 0, policy_version 3263 (0.0012) +[2024-07-05 10:49:13,526][17621] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11595.9). Total num frames: 13369344. Throughput: 0: 2915.1. Samples: 835548. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:13,527][17621] Avg episode reward: [(0, '29.833')] +[2024-07-05 10:49:16,387][22239] Updated weights for policy 0, policy_version 3273 (0.0012) +[2024-07-05 10:49:18,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11607.7). Total num frames: 13430784. Throughput: 0: 2911.5. Samples: 852980. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:18,526][17621] Avg episode reward: [(0, '30.754')] +[2024-07-05 10:49:19,925][22239] Updated weights for policy 0, policy_version 3283 (0.0012) +[2024-07-05 10:49:23,449][22239] Updated weights for policy 0, policy_version 3293 (0.0011) +[2024-07-05 10:49:23,525][17621] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 13488128. Throughput: 0: 2907.3. Samples: 870390. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:23,526][17621] Avg episode reward: [(0, '33.408')] +[2024-07-05 10:49:26,970][22239] Updated weights for policy 0, policy_version 3303 (0.0012) +[2024-07-05 10:49:28,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11673.7, 300 sec: 11663.2). Total num frames: 13545472. Throughput: 0: 2908.0. Samples: 879320. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:28,526][17621] Avg episode reward: [(0, '35.185')] +[2024-07-05 10:49:30,495][22239] Updated weights for policy 0, policy_version 3313 (0.0011) +[2024-07-05 10:49:33,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 13602816. Throughput: 0: 2906.5. Samples: 896780. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:33,526][17621] Avg episode reward: [(0, '34.412')] +[2024-07-05 10:49:34,017][22239] Updated weights for policy 0, policy_version 3323 (0.0012) +[2024-07-05 10:49:37,535][22239] Updated weights for policy 0, policy_version 3333 (0.0012) +[2024-07-05 10:49:38,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.4, 300 sec: 11663.2). Total num frames: 13660160. Throughput: 0: 2906.7. Samples: 914224. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:38,526][17621] Avg episode reward: [(0, '33.102')] +[2024-07-05 10:49:41,053][22239] Updated weights for policy 0, policy_version 3343 (0.0012) +[2024-07-05 10:49:43,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 13721600. Throughput: 0: 2898.7. Samples: 922738. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:43,527][17621] Avg episode reward: [(0, '34.745')] +[2024-07-05 10:49:44,569][22239] Updated weights for policy 0, policy_version 3353 (0.0011) +[2024-07-05 10:49:48,101][22239] Updated weights for policy 0, policy_version 3363 (0.0012) +[2024-07-05 10:49:48,525][17621] Fps is (10 sec: 11878.4, 60 sec: 11605.3, 300 sec: 11691.0). Total num frames: 13778944. Throughput: 0: 2900.5. Samples: 940162. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:49:48,526][17621] Avg episode reward: [(0, '32.323')] +[2024-07-05 10:49:51,625][22239] Updated weights for policy 0, policy_version 3373 (0.0011) +[2024-07-05 10:49:53,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11691.0). Total num frames: 13836288. Throughput: 0: 2902.3. Samples: 957618. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:53,526][17621] Avg episode reward: [(0, '31.738')] +[2024-07-05 10:49:55,150][22239] Updated weights for policy 0, policy_version 3383 (0.0012) +[2024-07-05 10:49:58,526][17621] Fps is (10 sec: 11468.6, 60 sec: 11605.4, 300 sec: 11691.0). Total num frames: 13893632. Throughput: 0: 2910.7. Samples: 966530. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:49:58,526][17621] Avg episode reward: [(0, '31.940')] +[2024-07-05 10:49:58,627][22239] Updated weights for policy 0, policy_version 3393 (0.0011) +[2024-07-05 10:50:02,120][22239] Updated weights for policy 0, policy_version 3403 (0.0012) +[2024-07-05 10:50:03,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 13955072. Throughput: 0: 2913.4. Samples: 984082. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:03,526][17621] Avg episode reward: [(0, '35.552')] +[2024-07-05 10:50:05,607][22239] Updated weights for policy 0, policy_version 3413 (0.0012) +[2024-07-05 10:50:08,525][17621] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 14012416. Throughput: 0: 2913.1. Samples: 1001478. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:08,526][17621] Avg episode reward: [(0, '32.818')] +[2024-07-05 10:50:09,164][22239] Updated weights for policy 0, policy_version 3423 (0.0011) +[2024-07-05 10:50:12,690][22239] Updated weights for policy 0, policy_version 3433 (0.0012) +[2024-07-05 10:50:13,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 14069760. Throughput: 0: 2912.1. Samples: 1010366. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:13,526][17621] Avg episode reward: [(0, '28.854')] +[2024-07-05 10:50:13,743][22225] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000003436_14073856.pth... +[2024-07-05 10:50:13,817][22225] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000002748_11255808.pth +[2024-07-05 10:50:16,231][22239] Updated weights for policy 0, policy_version 3443 (0.0011) +[2024-07-05 10:50:18,526][17621] Fps is (10 sec: 11468.4, 60 sec: 11605.3, 300 sec: 11690.9). Total num frames: 14127104. Throughput: 0: 2911.5. Samples: 1027800. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:18,527][17621] Avg episode reward: [(0, '30.917')] +[2024-07-05 10:50:19,742][22239] Updated weights for policy 0, policy_version 3453 (0.0012) +[2024-07-05 10:50:23,271][22239] Updated weights for policy 0, policy_version 3463 (0.0012) +[2024-07-05 10:50:23,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11691.0). Total num frames: 14184448. Throughput: 0: 2910.7. Samples: 1045204. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:23,526][17621] Avg episode reward: [(0, '33.489')] +[2024-07-05 10:50:26,749][22239] Updated weights for policy 0, policy_version 3473 (0.0011) +[2024-07-05 10:50:28,525][17621] Fps is (10 sec: 11878.7, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 14245888. Throughput: 0: 2911.8. Samples: 1053770. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:28,526][17621] Avg episode reward: [(0, '32.658')] +[2024-07-05 10:50:30,220][22239] Updated weights for policy 0, policy_version 3483 (0.0011) +[2024-07-05 10:50:33,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 14303232. Throughput: 0: 2924.7. Samples: 1071776. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:33,527][17621] Avg episode reward: [(0, '30.914')] +[2024-07-05 10:50:33,695][22239] Updated weights for policy 0, policy_version 3493 (0.0013) +[2024-07-05 10:50:37,153][22239] Updated weights for policy 0, policy_version 3503 (0.0011) +[2024-07-05 10:50:38,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 14360576. Throughput: 0: 2927.5. Samples: 1089356. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:38,527][17621] Avg episode reward: [(0, '31.179')] +[2024-07-05 10:50:40,653][22239] Updated weights for policy 0, policy_version 3513 (0.0012) +[2024-07-05 10:50:43,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 14422016. Throughput: 0: 2924.1. Samples: 1098116. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:43,526][17621] Avg episode reward: [(0, '34.625')] +[2024-07-05 10:50:44,112][22239] Updated weights for policy 0, policy_version 3523 (0.0011) +[2024-07-05 10:50:47,617][22239] Updated weights for policy 0, policy_version 3533 (0.0011) +[2024-07-05 10:50:48,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 14479360. Throughput: 0: 2930.0. Samples: 1115932. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:48,526][17621] Avg episode reward: [(0, '34.909')] +[2024-07-05 10:50:51,075][22239] Updated weights for policy 0, policy_version 3543 (0.0011) +[2024-07-05 10:50:53,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 14540800. Throughput: 0: 2935.2. Samples: 1133560. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:50:53,526][17621] Avg episode reward: [(0, '36.872')] +[2024-07-05 10:50:53,529][22225] Saving new best policy, reward=36.872! +[2024-07-05 10:50:54,550][22239] Updated weights for policy 0, policy_version 3553 (0.0011) +[2024-07-05 10:50:58,016][22239] Updated weights for policy 0, policy_version 3563 (0.0011) +[2024-07-05 10:50:58,526][17621] Fps is (10 sec: 11878.2, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 14598144. Throughput: 0: 2936.3. Samples: 1142498. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:50:58,527][17621] Avg episode reward: [(0, '38.247')] +[2024-07-05 10:50:58,709][22225] Saving new best policy, reward=38.247! +[2024-07-05 10:51:01,499][22239] Updated weights for policy 0, policy_version 3573 (0.0011) +[2024-07-05 10:51:03,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11732.6). Total num frames: 14655488. Throughput: 0: 2939.6. Samples: 1160082. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:51:03,526][17621] Avg episode reward: [(0, '34.732')] +[2024-07-05 10:51:04,965][22239] Updated weights for policy 0, policy_version 3583 (0.0011) +[2024-07-05 10:51:08,423][22239] Updated weights for policy 0, policy_version 3593 (0.0011) +[2024-07-05 10:51:08,526][17621] Fps is (10 sec: 11878.5, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 14716928. Throughput: 0: 2943.4. Samples: 1177656. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:51:08,526][17621] Avg episode reward: [(0, '31.727')] +[2024-07-05 10:51:11,899][22239] Updated weights for policy 0, policy_version 3603 (0.0011) +[2024-07-05 10:51:13,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 14774272. Throughput: 0: 2953.7. Samples: 1186688. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:51:13,526][17621] Avg episode reward: [(0, '32.747')] +[2024-07-05 10:51:15,361][22239] Updated weights for policy 0, policy_version 3613 (0.0011) +[2024-07-05 10:51:18,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11810.2, 300 sec: 11732.6). Total num frames: 14835712. Throughput: 0: 2944.2. Samples: 1204264. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:51:18,526][17621] Avg episode reward: [(0, '34.567')] +[2024-07-05 10:51:18,846][22239] Updated weights for policy 0, policy_version 3623 (0.0012) +[2024-07-05 10:51:22,309][22239] Updated weights for policy 0, policy_version 3633 (0.0011) +[2024-07-05 10:51:23,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11732.6). Total num frames: 14893056. Throughput: 0: 2948.2. Samples: 1222024. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:51:23,526][17621] Avg episode reward: [(0, '33.371')] +[2024-07-05 10:51:25,794][22239] Updated weights for policy 0, policy_version 3643 (0.0011) +[2024-07-05 10:51:28,526][17621] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 14950400. Throughput: 0: 2950.0. Samples: 1230864. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:51:28,526][17621] Avg episode reward: [(0, '33.892')] +[2024-07-05 10:51:29,266][22239] Updated weights for policy 0, policy_version 3653 (0.0011) +[2024-07-05 10:51:32,736][22239] Updated weights for policy 0, policy_version 3663 (0.0011) +[2024-07-05 10:51:33,526][17621] Fps is (10 sec: 11878.2, 60 sec: 11810.1, 300 sec: 11746.5). Total num frames: 15011840. Throughput: 0: 2945.0. Samples: 1248456. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:51:33,527][17621] Avg episode reward: [(0, '34.873')] +[2024-07-05 10:51:36,199][22239] Updated weights for policy 0, policy_version 3673 (0.0011) +[2024-07-05 10:51:38,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11732.6). Total num frames: 15069184. Throughput: 0: 2953.4. Samples: 1266464. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:51:38,527][17621] Avg episode reward: [(0, '34.261')] +[2024-07-05 10:51:39,671][22239] Updated weights for policy 0, policy_version 3683 (0.0011) +[2024-07-05 10:51:43,151][22239] Updated weights for policy 0, policy_version 3693 (0.0011) +[2024-07-05 10:51:43,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11746.5). Total num frames: 15130624. Throughput: 0: 2945.2. Samples: 1275030. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:51:43,526][17621] Avg episode reward: [(0, '35.402')] +[2024-07-05 10:51:46,632][22239] Updated weights for policy 0, policy_version 3703 (0.0012) +[2024-07-05 10:51:48,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11746.5). Total num frames: 15187968. Throughput: 0: 2951.3. Samples: 1292892. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:51:48,526][17621] Avg episode reward: [(0, '36.698')] +[2024-07-05 10:51:50,093][22239] Updated weights for policy 0, policy_version 3713 (0.0011) +[2024-07-05 10:51:53,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 15245312. Throughput: 0: 2953.5. Samples: 1310562. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:51:53,526][17621] Avg episode reward: [(0, '37.104')] +[2024-07-05 10:51:53,584][22239] Updated weights for policy 0, policy_version 3723 (0.0011) +[2024-07-05 10:51:57,049][22239] Updated weights for policy 0, policy_version 3733 (0.0012) +[2024-07-05 10:51:58,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11746.5). Total num frames: 15306752. Throughput: 0: 2945.8. Samples: 1319250. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:51:58,526][17621] Avg episode reward: [(0, '36.921')] +[2024-07-05 10:52:00,538][22239] Updated weights for policy 0, policy_version 3743 (0.0011) +[2024-07-05 10:52:03,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11732.6). Total num frames: 15364096. Throughput: 0: 2952.2. Samples: 1337112. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:52:03,526][17621] Avg episode reward: [(0, '36.353')] +[2024-07-05 10:52:04,015][22239] Updated weights for policy 0, policy_version 3753 (0.0011) +[2024-07-05 10:52:07,470][22239] Updated weights for policy 0, policy_version 3763 (0.0011) +[2024-07-05 10:52:08,525][17621] Fps is (10 sec: 11878.4, 60 sec: 11810.2, 300 sec: 11746.5). Total num frames: 15425536. Throughput: 0: 2949.0. Samples: 1354728. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:52:08,526][17621] Avg episode reward: [(0, '35.090')] +[2024-07-05 10:52:10,948][22239] Updated weights for policy 0, policy_version 3773 (0.0011) +[2024-07-05 10:52:13,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11732.6). Total num frames: 15482880. Throughput: 0: 2951.9. Samples: 1363698. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:52:13,527][17621] Avg episode reward: [(0, '34.143')] +[2024-07-05 10:52:13,723][22225] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000003781_15486976.pth... +[2024-07-05 10:52:13,794][22225] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000003094_12673024.pth +[2024-07-05 10:52:14,428][22239] Updated weights for policy 0, policy_version 3783 (0.0012) +[2024-07-05 10:52:17,913][22239] Updated weights for policy 0, policy_version 3793 (0.0011) +[2024-07-05 10:52:18,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 15540224. Throughput: 0: 2950.1. Samples: 1381210. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:52:18,526][17621] Avg episode reward: [(0, '33.997')] +[2024-07-05 10:52:21,382][22239] Updated weights for policy 0, policy_version 3803 (0.0011) +[2024-07-05 10:52:23,526][17621] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11732.6). Total num frames: 15601664. Throughput: 0: 2940.3. Samples: 1398778. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:52:23,526][17621] Avg episode reward: [(0, '35.518')] +[2024-07-05 10:52:24,855][22239] Updated weights for policy 0, policy_version 3813 (0.0011) +[2024-07-05 10:52:28,329][22239] Updated weights for policy 0, policy_version 3823 (0.0012) +[2024-07-05 10:52:28,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11732.6). Total num frames: 15659008. Throughput: 0: 2949.6. Samples: 1407764. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:52:28,526][17621] Avg episode reward: [(0, '34.590')] +[2024-07-05 10:52:31,815][22239] Updated weights for policy 0, policy_version 3833 (0.0012) +[2024-07-05 10:52:33,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 15716352. Throughput: 0: 2942.8. Samples: 1425318. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:52:33,527][17621] Avg episode reward: [(0, '34.683')] +[2024-07-05 10:52:35,306][22239] Updated weights for policy 0, policy_version 3843 (0.0012) +[2024-07-05 10:52:38,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11732.6). Total num frames: 15777792. Throughput: 0: 2939.1. Samples: 1442824. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:52:38,527][17621] Avg episode reward: [(0, '34.328')] +[2024-07-05 10:52:38,783][22239] Updated weights for policy 0, policy_version 3853 (0.0011) +[2024-07-05 10:52:42,285][22239] Updated weights for policy 0, policy_version 3863 (0.0011) +[2024-07-05 10:52:43,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 15835136. Throughput: 0: 2946.0. Samples: 1451818. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:52:43,526][17621] Avg episode reward: [(0, '33.907')] +[2024-07-05 10:52:45,791][22239] Updated weights for policy 0, policy_version 3873 (0.0012) +[2024-07-05 10:52:48,526][17621] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 15892480. Throughput: 0: 2936.6. Samples: 1469258. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:52:48,526][17621] Avg episode reward: [(0, '35.607')] +[2024-07-05 10:52:49,315][22239] Updated weights for policy 0, policy_version 3883 (0.0012) +[2024-07-05 10:52:52,795][22239] Updated weights for policy 0, policy_version 3893 (0.0012) +[2024-07-05 10:52:53,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11732.6). Total num frames: 15953920. Throughput: 0: 2934.7. Samples: 1486792. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:52:53,526][17621] Avg episode reward: [(0, '36.249')] +[2024-07-05 10:52:56,291][22239] Updated weights for policy 0, policy_version 3903 (0.0011) +[2024-07-05 10:52:58,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11732.6). Total num frames: 16011264. Throughput: 0: 2934.6. Samples: 1495754. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:52:58,526][17621] Avg episode reward: [(0, '34.536')] +[2024-07-05 10:52:59,773][22239] Updated weights for policy 0, policy_version 3913 (0.0011) +[2024-07-05 10:53:03,274][22239] Updated weights for policy 0, policy_version 3923 (0.0011) +[2024-07-05 10:53:03,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 16068608. Throughput: 0: 2934.7. Samples: 1513270. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:03,526][17621] Avg episode reward: [(0, '33.284')] +[2024-07-05 10:53:06,750][22239] Updated weights for policy 0, policy_version 3933 (0.0011) +[2024-07-05 10:53:08,525][17621] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 16130048. Throughput: 0: 2934.4. Samples: 1530826. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:08,526][17621] Avg episode reward: [(0, '34.184')] +[2024-07-05 10:53:10,256][22239] Updated weights for policy 0, policy_version 3943 (0.0011) +[2024-07-05 10:53:13,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 16187392. Throughput: 0: 2929.6. Samples: 1539598. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:13,527][17621] Avg episode reward: [(0, '33.827')] +[2024-07-05 10:53:13,806][22239] Updated weights for policy 0, policy_version 3953 (0.0012) +[2024-07-05 10:53:17,350][22239] Updated weights for policy 0, policy_version 3963 (0.0012) +[2024-07-05 10:53:18,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 16244736. Throughput: 0: 2923.2. Samples: 1556862. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:18,526][17621] Avg episode reward: [(0, '36.455')] +[2024-07-05 10:53:20,882][22239] Updated weights for policy 0, policy_version 3973 (0.0012) +[2024-07-05 10:53:23,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 16302080. Throughput: 0: 2921.3. Samples: 1574280. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:23,527][17621] Avg episode reward: [(0, '34.882')] +[2024-07-05 10:53:24,409][22239] Updated weights for policy 0, policy_version 3983 (0.0012) +[2024-07-05 10:53:27,948][22239] Updated weights for policy 0, policy_version 3993 (0.0012) +[2024-07-05 10:53:28,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 16359424. Throughput: 0: 2915.7. Samples: 1583024. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:28,527][17621] Avg episode reward: [(0, '34.714')] +[2024-07-05 10:53:31,470][22239] Updated weights for policy 0, policy_version 4003 (0.0012) +[2024-07-05 10:53:33,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 16416768. Throughput: 0: 2914.6. Samples: 1600416. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:33,526][17621] Avg episode reward: [(0, '35.665')] +[2024-07-05 10:53:35,004][22239] Updated weights for policy 0, policy_version 4013 (0.0012) +[2024-07-05 10:53:38,526][17621] Fps is (10 sec: 11468.6, 60 sec: 11605.3, 300 sec: 11704.8). Total num frames: 16474112. Throughput: 0: 2911.9. Samples: 1617828. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:38,527][17621] Avg episode reward: [(0, '35.951')] +[2024-07-05 10:53:38,549][22239] Updated weights for policy 0, policy_version 4023 (0.0012) +[2024-07-05 10:53:42,095][22239] Updated weights for policy 0, policy_version 4033 (0.0012) +[2024-07-05 10:53:43,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 16535552. Throughput: 0: 2901.5. Samples: 1626320. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:43,526][17621] Avg episode reward: [(0, '36.758')] +[2024-07-05 10:53:45,589][22239] Updated weights for policy 0, policy_version 4043 (0.0013) +[2024-07-05 10:53:48,526][17621] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 16592896. Throughput: 0: 2905.2. Samples: 1644004. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:48,526][17621] Avg episode reward: [(0, '36.450')] +[2024-07-05 10:53:49,071][22239] Updated weights for policy 0, policy_version 4053 (0.0012) +[2024-07-05 10:53:52,553][22239] Updated weights for policy 0, policy_version 4063 (0.0013) +[2024-07-05 10:53:53,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11704.9). Total num frames: 16650240. Throughput: 0: 2908.0. Samples: 1661684. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:53,526][17621] Avg episode reward: [(0, '35.086')] +[2024-07-05 10:53:56,049][22239] Updated weights for policy 0, policy_version 4073 (0.0011) +[2024-07-05 10:53:58,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 16711680. Throughput: 0: 2903.5. Samples: 1670256. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:53:58,526][17621] Avg episode reward: [(0, '37.287')] +[2024-07-05 10:53:59,540][22239] Updated weights for policy 0, policy_version 4083 (0.0012) +[2024-07-05 10:54:03,009][22239] Updated weights for policy 0, policy_version 4093 (0.0011) +[2024-07-05 10:54:03,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 16769024. Throughput: 0: 2918.9. Samples: 1688212. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:54:03,526][17621] Avg episode reward: [(0, '38.159')] +[2024-07-05 10:54:06,493][22239] Updated weights for policy 0, policy_version 4103 (0.0012) +[2024-07-05 10:54:08,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11718.7). Total num frames: 16826368. Throughput: 0: 2922.6. Samples: 1705796. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:54:08,526][17621] Avg episode reward: [(0, '36.902')] +[2024-07-05 10:54:09,976][22239] Updated weights for policy 0, policy_version 4113 (0.0012) +[2024-07-05 10:54:13,481][22239] Updated weights for policy 0, policy_version 4123 (0.0012) +[2024-07-05 10:54:13,525][17621] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 16887808. Throughput: 0: 2918.3. Samples: 1714348. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:54:13,526][17621] Avg episode reward: [(0, '37.767')] +[2024-07-05 10:54:13,529][22225] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004123_16887808.pth... +[2024-07-05 10:54:13,602][22225] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000003436_14073856.pth +[2024-07-05 10:54:16,980][22239] Updated weights for policy 0, policy_version 4133 (0.0011) +[2024-07-05 10:54:18,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 16945152. Throughput: 0: 2926.2. Samples: 1732094. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:54:18,527][17621] Avg episode reward: [(0, '35.990')] +[2024-07-05 10:54:20,484][22239] Updated weights for policy 0, policy_version 4143 (0.0011) +[2024-07-05 10:54:23,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 17002496. Throughput: 0: 2931.6. Samples: 1749748. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:54:23,526][17621] Avg episode reward: [(0, '36.294')] +[2024-07-05 10:54:23,999][22239] Updated weights for policy 0, policy_version 4153 (0.0012) +[2024-07-05 10:54:27,482][22239] Updated weights for policy 0, policy_version 4163 (0.0011) +[2024-07-05 10:54:28,525][17621] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 17059840. Throughput: 0: 2932.8. Samples: 1758294. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:54:28,526][17621] Avg episode reward: [(0, '37.143')] +[2024-07-05 10:54:30,968][22239] Updated weights for policy 0, policy_version 4173 (0.0011) +[2024-07-05 10:54:33,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11732.6). Total num frames: 17121280. Throughput: 0: 2930.3. Samples: 1775870. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:54:33,526][17621] Avg episode reward: [(0, '37.790')] +[2024-07-05 10:54:34,458][22239] Updated weights for policy 0, policy_version 4183 (0.0011) +[2024-07-05 10:54:37,964][22239] Updated weights for policy 0, policy_version 4193 (0.0011) +[2024-07-05 10:54:38,526][17621] Fps is (10 sec: 11878.2, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 17178624. Throughput: 0: 2933.4. Samples: 1793686. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:54:38,527][17621] Avg episode reward: [(0, '33.890')] +[2024-07-05 10:54:41,437][22239] Updated weights for policy 0, policy_version 4203 (0.0011) +[2024-07-05 10:54:43,525][17621] Fps is (10 sec: 11469.0, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 17235968. Throughput: 0: 2934.9. Samples: 1802326. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:54:43,527][17621] Avg episode reward: [(0, '33.734')] +[2024-07-05 10:54:44,911][22239] Updated weights for policy 0, policy_version 4213 (0.0011) +[2024-07-05 10:54:48,411][22239] Updated weights for policy 0, policy_version 4223 (0.0011) +[2024-07-05 10:54:48,525][17621] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 17297408. Throughput: 0: 2928.4. Samples: 1819992. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:54:48,526][17621] Avg episode reward: [(0, '33.976')] +[2024-07-05 10:54:51,907][22239] Updated weights for policy 0, policy_version 4233 (0.0011) +[2024-07-05 10:54:53,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 17354752. Throughput: 0: 2931.9. Samples: 1837732. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:54:53,526][17621] Avg episode reward: [(0, '35.507')] +[2024-07-05 10:54:55,407][22239] Updated weights for policy 0, policy_version 4243 (0.0011) +[2024-07-05 10:54:58,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 17412096. Throughput: 0: 2932.5. Samples: 1846310. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:54:58,526][17621] Avg episode reward: [(0, '35.977')] +[2024-07-05 10:54:58,890][22239] Updated weights for policy 0, policy_version 4253 (0.0011) +[2024-07-05 10:55:02,374][22239] Updated weights for policy 0, policy_version 4263 (0.0011) +[2024-07-05 10:55:03,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11732.6). Total num frames: 17473536. Throughput: 0: 2929.0. Samples: 1863900. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:55:03,526][17621] Avg episode reward: [(0, '35.078')] +[2024-07-05 10:55:05,917][22239] Updated weights for policy 0, policy_version 4273 (0.0011) +[2024-07-05 10:55:08,526][17621] Fps is (10 sec: 11878.2, 60 sec: 11741.8, 300 sec: 11732.6). Total num frames: 17530880. Throughput: 0: 2924.8. Samples: 1881364. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:55:08,526][17621] Avg episode reward: [(0, '35.458')] +[2024-07-05 10:55:09,433][22239] Updated weights for policy 0, policy_version 4283 (0.0012) +[2024-07-05 10:55:12,960][22239] Updated weights for policy 0, policy_version 4293 (0.0012) +[2024-07-05 10:55:13,526][17621] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11732.6). Total num frames: 17588224. Throughput: 0: 2932.3. Samples: 1890246. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:55:13,526][17621] Avg episode reward: [(0, '35.649')] +[2024-07-05 10:55:16,469][22239] Updated weights for policy 0, policy_version 4303 (0.0012) +[2024-07-05 10:55:18,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11732.6). Total num frames: 17645568. Throughput: 0: 2929.5. Samples: 1907696. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:55:18,527][17621] Avg episode reward: [(0, '36.062')] +[2024-07-05 10:55:19,959][22239] Updated weights for policy 0, policy_version 4313 (0.0012) +[2024-07-05 10:55:23,447][22239] Updated weights for policy 0, policy_version 4323 (0.0011) +[2024-07-05 10:55:23,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 17707008. Throughput: 0: 2923.5. Samples: 1925242. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:55:23,527][17621] Avg episode reward: [(0, '36.211')] +[2024-07-05 10:55:26,942][22239] Updated weights for policy 0, policy_version 4333 (0.0011) +[2024-07-05 10:55:28,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11732.6). Total num frames: 17764352. Throughput: 0: 2929.9. Samples: 1934172. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:55:28,527][17621] Avg episode reward: [(0, '38.604')] +[2024-07-05 10:55:28,670][22225] Saving new best policy, reward=38.604! +[2024-07-05 10:55:30,432][22239] Updated weights for policy 0, policy_version 4343 (0.0011) +[2024-07-05 10:55:33,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11732.6). Total num frames: 17821696. Throughput: 0: 2926.0. Samples: 1951662. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:55:33,526][17621] Avg episode reward: [(0, '34.447')] +[2024-07-05 10:55:33,945][22239] Updated weights for policy 0, policy_version 4353 (0.0012) +[2024-07-05 10:55:37,418][22239] Updated weights for policy 0, policy_version 4363 (0.0011) +[2024-07-05 10:55:38,525][17621] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 17883136. Throughput: 0: 2921.6. Samples: 1969204. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:55:38,526][17621] Avg episode reward: [(0, '34.099')] +[2024-07-05 10:55:40,874][22239] Updated weights for policy 0, policy_version 4373 (0.0011) +[2024-07-05 10:55:43,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 17940480. Throughput: 0: 2930.8. Samples: 1978194. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:55:43,526][17621] Avg episode reward: [(0, '35.279')] +[2024-07-05 10:55:44,351][22239] Updated weights for policy 0, policy_version 4383 (0.0012) +[2024-07-05 10:55:47,825][22239] Updated weights for policy 0, policy_version 4393 (0.0011) +[2024-07-05 10:55:48,525][17621] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 18001920. Throughput: 0: 2929.8. Samples: 1995740. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:55:48,526][17621] Avg episode reward: [(0, '38.352')] +[2024-07-05 10:55:51,306][22239] Updated weights for policy 0, policy_version 4403 (0.0011) +[2024-07-05 10:55:53,526][17621] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 18059264. Throughput: 0: 2931.5. Samples: 2013280. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:55:53,526][17621] Avg episode reward: [(0, '35.935')] +[2024-07-05 10:55:54,793][22239] Updated weights for policy 0, policy_version 4413 (0.0011) +[2024-07-05 10:55:58,394][22239] Updated weights for policy 0, policy_version 4423 (0.0012) +[2024-07-05 10:55:58,526][17621] Fps is (10 sec: 11468.3, 60 sec: 11741.8, 300 sec: 11732.6). Total num frames: 18116608. Throughput: 0: 2932.4. Samples: 2022204. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:55:58,527][17621] Avg episode reward: [(0, '35.283')] +[2024-07-05 10:56:01,944][22239] Updated weights for policy 0, policy_version 4433 (0.0012) +[2024-07-05 10:56:03,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 18173952. Throughput: 0: 2928.9. Samples: 2039498. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:56:03,527][17621] Avg episode reward: [(0, '34.056')] +[2024-07-05 10:56:05,469][22239] Updated weights for policy 0, policy_version 4443 (0.0011) +[2024-07-05 10:56:08,526][17621] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 18231296. Throughput: 0: 2926.5. Samples: 2056936. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:56:08,527][17621] Avg episode reward: [(0, '33.183')] +[2024-07-05 10:56:08,978][22239] Updated weights for policy 0, policy_version 4453 (0.0012) +[2024-07-05 10:56:12,499][22239] Updated weights for policy 0, policy_version 4463 (0.0012) +[2024-07-05 10:56:13,526][17621] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 18288640. Throughput: 0: 2917.5. Samples: 2065460. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:56:13,527][17621] Avg episode reward: [(0, '37.524')] +[2024-07-05 10:56:13,558][22225] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004466_18292736.pth... +[2024-07-05 10:56:13,632][22225] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000003781_15486976.pth +[2024-07-05 10:56:16,033][22239] Updated weights for policy 0, policy_version 4473 (0.0012) +[2024-07-05 10:56:18,526][17621] Fps is (10 sec: 11469.0, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 18345984. Throughput: 0: 2915.6. Samples: 2082864. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:56:18,526][17621] Avg episode reward: [(0, '36.274')] +[2024-07-05 10:56:19,586][22239] Updated weights for policy 0, policy_version 4483 (0.0012) +[2024-07-05 10:56:23,108][22239] Updated weights for policy 0, policy_version 4493 (0.0012) +[2024-07-05 10:56:23,526][17621] Fps is (10 sec: 11878.7, 60 sec: 11673.6, 300 sec: 11718.7). Total num frames: 18407424. Throughput: 0: 2911.5. Samples: 2100222. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:56:23,526][17621] Avg episode reward: [(0, '38.506')] +[2024-07-05 10:56:26,660][22239] Updated weights for policy 0, policy_version 4503 (0.0012) +[2024-07-05 10:56:28,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 18464768. Throughput: 0: 2909.4. Samples: 2109118. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:56:28,526][17621] Avg episode reward: [(0, '35.978')] +[2024-07-05 10:56:30,181][22239] Updated weights for policy 0, policy_version 4513 (0.0012) +[2024-07-05 10:56:33,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 18522112. Throughput: 0: 2902.6. Samples: 2126358. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:56:33,527][17621] Avg episode reward: [(0, '38.099')] +[2024-07-05 10:56:33,742][22239] Updated weights for policy 0, policy_version 4523 (0.0012) +[2024-07-05 10:56:37,248][22239] Updated weights for policy 0, policy_version 4533 (0.0011) +[2024-07-05 10:56:38,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11691.0). Total num frames: 18579456. Throughput: 0: 2905.1. Samples: 2144010. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:56:38,526][17621] Avg episode reward: [(0, '38.060')] +[2024-07-05 10:56:40,732][22239] Updated weights for policy 0, policy_version 4543 (0.0011) +[2024-07-05 10:56:43,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 18640896. Throughput: 0: 2896.7. Samples: 2152554. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:56:43,526][17621] Avg episode reward: [(0, '40.711')] +[2024-07-05 10:56:43,528][22225] Saving new best policy, reward=40.711! +[2024-07-05 10:56:44,222][22239] Updated weights for policy 0, policy_version 4553 (0.0011) +[2024-07-05 10:56:47,723][22239] Updated weights for policy 0, policy_version 4563 (0.0011) +[2024-07-05 10:56:48,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11605.3, 300 sec: 11704.8). Total num frames: 18698240. Throughput: 0: 2903.7. Samples: 2170164. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:56:48,526][17621] Avg episode reward: [(0, '39.459')] +[2024-07-05 10:56:51,262][22239] Updated weights for policy 0, policy_version 4573 (0.0014) +[2024-07-05 10:56:53,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11691.0). Total num frames: 18755584. Throughput: 0: 2904.4. Samples: 2187632. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:56:53,526][17621] Avg episode reward: [(0, '36.632')] +[2024-07-05 10:56:54,787][22239] Updated weights for policy 0, policy_version 4583 (0.0012) +[2024-07-05 10:56:58,336][22239] Updated weights for policy 0, policy_version 4593 (0.0012) +[2024-07-05 10:56:58,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.4, 300 sec: 11691.0). Total num frames: 18812928. Throughput: 0: 2909.1. Samples: 2196370. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:56:58,526][17621] Avg episode reward: [(0, '34.809')] +[2024-07-05 10:57:01,826][22239] Updated weights for policy 0, policy_version 4603 (0.0011) +[2024-07-05 10:57:03,525][17621] Fps is (10 sec: 11468.9, 60 sec: 11605.4, 300 sec: 11677.1). Total num frames: 18870272. Throughput: 0: 2910.5. Samples: 2213834. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:03,526][17621] Avg episode reward: [(0, '35.190')] +[2024-07-05 10:57:05,335][22239] Updated weights for policy 0, policy_version 4613 (0.0011) +[2024-07-05 10:57:08,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 18931712. Throughput: 0: 2912.8. Samples: 2231298. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:08,527][17621] Avg episode reward: [(0, '34.187')] +[2024-07-05 10:57:08,834][22239] Updated weights for policy 0, policy_version 4623 (0.0012) +[2024-07-05 10:57:12,351][22239] Updated weights for policy 0, policy_version 4633 (0.0012) +[2024-07-05 10:57:13,526][17621] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 18989056. Throughput: 0: 2914.2. Samples: 2240258. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 10:57:13,527][17621] Avg episode reward: [(0, '33.250')] +[2024-07-05 10:57:15,853][22239] Updated weights for policy 0, policy_version 4643 (0.0011) +[2024-07-05 10:57:18,526][17621] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 19046400. Throughput: 0: 2919.0. Samples: 2257712. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:18,527][17621] Avg episode reward: [(0, '33.154')] +[2024-07-05 10:57:19,380][22239] Updated weights for policy 0, policy_version 4653 (0.0011) +[2024-07-05 10:57:22,876][22239] Updated weights for policy 0, policy_version 4663 (0.0012) +[2024-07-05 10:57:23,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11677.1). Total num frames: 19103744. Throughput: 0: 2914.7. Samples: 2275172. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:23,527][17621] Avg episode reward: [(0, '34.708')] +[2024-07-05 10:57:26,344][22239] Updated weights for policy 0, policy_version 4673 (0.0011) +[2024-07-05 10:57:28,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 19165184. Throughput: 0: 2921.2. Samples: 2284006. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:28,526][17621] Avg episode reward: [(0, '34.441')] +[2024-07-05 10:57:29,847][22239] Updated weights for policy 0, policy_version 4683 (0.0011) +[2024-07-05 10:57:33,344][22239] Updated weights for policy 0, policy_version 4693 (0.0012) +[2024-07-05 10:57:33,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 19222528. Throughput: 0: 2923.3. Samples: 2301712. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:33,526][17621] Avg episode reward: [(0, '33.052')] +[2024-07-05 10:57:36,843][22239] Updated weights for policy 0, policy_version 4703 (0.0011) +[2024-07-05 10:57:38,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 19279872. Throughput: 0: 2923.1. Samples: 2319172. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:38,527][17621] Avg episode reward: [(0, '34.933')] +[2024-07-05 10:57:40,358][22239] Updated weights for policy 0, policy_version 4713 (0.0014) +[2024-07-05 10:57:43,526][17621] Fps is (10 sec: 11468.6, 60 sec: 11605.3, 300 sec: 11677.1). Total num frames: 19337216. Throughput: 0: 2918.5. Samples: 2327704. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:43,527][17621] Avg episode reward: [(0, '36.222')] +[2024-07-05 10:57:43,885][22239] Updated weights for policy 0, policy_version 4723 (0.0011) +[2024-07-05 10:57:47,473][22239] Updated weights for policy 0, policy_version 4733 (0.0012) +[2024-07-05 10:57:48,526][17621] Fps is (10 sec: 11468.5, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 19394560. Throughput: 0: 2917.1. Samples: 2345104. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:48,527][17621] Avg episode reward: [(0, '36.659')] +[2024-07-05 10:57:51,031][22239] Updated weights for policy 0, policy_version 4743 (0.0012) +[2024-07-05 10:57:53,526][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 19456000. Throughput: 0: 2914.0. Samples: 2362426. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:53,526][17621] Avg episode reward: [(0, '36.453')] +[2024-07-05 10:57:54,561][22239] Updated weights for policy 0, policy_version 4753 (0.0012) +[2024-07-05 10:57:58,121][22239] Updated weights for policy 0, policy_version 4763 (0.0011) +[2024-07-05 10:57:58,525][17621] Fps is (10 sec: 11878.8, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 19513344. Throughput: 0: 2906.1. Samples: 2371034. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:57:58,526][17621] Avg episode reward: [(0, '34.101')] +[2024-07-05 10:58:01,686][22239] Updated weights for policy 0, policy_version 4773 (0.0012) +[2024-07-05 10:58:03,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 19570688. Throughput: 0: 2899.8. Samples: 2388202. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:58:03,527][17621] Avg episode reward: [(0, '34.025')] +[2024-07-05 10:58:05,205][22239] Updated weights for policy 0, policy_version 4783 (0.0011) +[2024-07-05 10:58:08,525][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 19628032. Throughput: 0: 2906.2. Samples: 2405950. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:58:08,527][17621] Avg episode reward: [(0, '36.401')] +[2024-07-05 10:58:08,677][22239] Updated weights for policy 0, policy_version 4793 (0.0011) +[2024-07-05 10:58:12,162][22239] Updated weights for policy 0, policy_version 4803 (0.0011) +[2024-07-05 10:58:13,526][17621] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 19685376. Throughput: 0: 2903.3. Samples: 2414656. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:58:13,526][17621] Avg episode reward: [(0, '37.105')] +[2024-07-05 10:58:13,551][22225] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004807_19689472.pth... +[2024-07-05 10:58:13,627][22225] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004123_16887808.pth +[2024-07-05 10:58:15,714][22239] Updated weights for policy 0, policy_version 4813 (0.0012) +[2024-07-05 10:58:18,526][17621] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 19742720. Throughput: 0: 2895.2. Samples: 2431996. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:58:18,526][17621] Avg episode reward: [(0, '34.284')] +[2024-07-05 10:58:19,293][22239] Updated weights for policy 0, policy_version 4823 (0.0012) +[2024-07-05 10:58:22,803][22239] Updated weights for policy 0, policy_version 4833 (0.0011) +[2024-07-05 10:58:23,525][17621] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 19804160. Throughput: 0: 2894.9. Samples: 2449442. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:58:23,526][17621] Avg episode reward: [(0, '34.538')] +[2024-07-05 10:58:26,325][22239] Updated weights for policy 0, policy_version 4843 (0.0012) +[2024-07-05 10:58:28,525][17621] Fps is (10 sec: 11878.6, 60 sec: 11605.3, 300 sec: 11677.1). Total num frames: 19861504. Throughput: 0: 2901.2. Samples: 2458256. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:58:28,526][17621] Avg episode reward: [(0, '36.055')] +[2024-07-05 10:58:29,961][22239] Updated weights for policy 0, policy_version 4853 (0.0012) +[2024-07-05 10:58:33,525][17621] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11677.1). Total num frames: 19918848. Throughput: 0: 2890.9. Samples: 2475192. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:58:33,526][17621] Avg episode reward: [(0, '37.918')] +[2024-07-05 10:58:33,527][22239] Updated weights for policy 0, policy_version 4863 (0.0012) +[2024-07-05 10:58:37,120][22239] Updated weights for policy 0, policy_version 4873 (0.0012) +[2024-07-05 10:58:38,526][17621] Fps is (10 sec: 11059.1, 60 sec: 11537.1, 300 sec: 11649.3). Total num frames: 19972096. Throughput: 0: 2889.0. Samples: 2492432. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 10:58:38,526][17621] Avg episode reward: [(0, '37.645')] +[2024-07-05 10:58:40,633][22239] Updated weights for policy 0, policy_version 4883 (0.0011) +[2024-07-05 10:58:41,001][22225] Stopping Batcher_0... +[2024-07-05 10:58:41,002][22225] Loop batcher_evt_loop terminating... +[2024-07-05 10:58:41,001][17621] Component Batcher_0 stopped! +[2024-07-05 10:58:41,002][22225] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004884_20004864.pth... +[2024-07-05 10:58:41,011][22241] Stopping RolloutWorker_w2... +[2024-07-05 10:58:41,011][22242] Stopping RolloutWorker_w4... +[2024-07-05 10:58:41,011][22238] Stopping RolloutWorker_w0... +[2024-07-05 10:58:41,011][22240] Stopping RolloutWorker_w1... +[2024-07-05 10:58:41,011][22241] Loop rollout_proc2_evt_loop terminating... +[2024-07-05 10:58:41,011][22242] Loop rollout_proc4_evt_loop terminating... +[2024-07-05 10:58:41,011][22238] Loop rollout_proc0_evt_loop terminating... +[2024-07-05 10:58:41,011][22244] Stopping RolloutWorker_w5... +[2024-07-05 10:58:41,011][22240] Loop rollout_proc1_evt_loop terminating... +[2024-07-05 10:58:41,011][22243] Stopping RolloutWorker_w3... +[2024-07-05 10:58:41,011][22244] Loop rollout_proc5_evt_loop terminating... +[2024-07-05 10:58:41,011][22243] Loop rollout_proc3_evt_loop terminating... +[2024-07-05 10:58:41,011][22245] Stopping RolloutWorker_w6... +[2024-07-05 10:58:41,011][17621] Component RolloutWorker_w2 stopped! +[2024-07-05 10:58:41,012][22245] Loop rollout_proc6_evt_loop terminating... +[2024-07-05 10:58:41,012][22246] Stopping RolloutWorker_w7... +[2024-07-05 10:58:41,012][22246] Loop rollout_proc7_evt_loop terminating... +[2024-07-05 10:58:41,012][17621] Component RolloutWorker_w4 stopped! +[2024-07-05 10:58:41,012][17621] Component RolloutWorker_w0 stopped! +[2024-07-05 10:58:41,013][17621] Component RolloutWorker_w1 stopped! +[2024-07-05 10:58:41,014][17621] Component RolloutWorker_w5 stopped! +[2024-07-05 10:58:41,014][17621] Component RolloutWorker_w3 stopped! +[2024-07-05 10:58:41,015][17621] Component RolloutWorker_w6 stopped! +[2024-07-05 10:58:41,016][17621] Component RolloutWorker_w7 stopped! +[2024-07-05 10:58:41,036][22239] Weights refcount: 2 0 +[2024-07-05 10:58:41,037][22239] Stopping InferenceWorker_p0-w0... +[2024-07-05 10:58:41,037][22239] Loop inference_proc0-0_evt_loop terminating... +[2024-07-05 10:58:41,037][17621] Component InferenceWorker_p0-w0 stopped! +[2024-07-05 10:58:41,096][22225] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004466_18292736.pth +[2024-07-05 10:58:41,108][22225] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004884_20004864.pth... +[2024-07-05 10:58:41,223][22225] Stopping LearnerWorker_p0... +[2024-07-05 10:58:41,224][22225] Loop learner_proc0_evt_loop terminating... +[2024-07-05 10:58:41,224][17621] Component LearnerWorker_p0 stopped! +[2024-07-05 10:58:41,226][17621] Waiting for process learner_proc0 to stop... +[2024-07-05 10:58:42,188][17621] Waiting for process inference_proc0-0 to join... +[2024-07-05 10:58:42,189][17621] Waiting for process rollout_proc0 to join... +[2024-07-05 10:58:42,189][17621] Waiting for process rollout_proc1 to join... +[2024-07-05 10:58:42,189][17621] Waiting for process rollout_proc2 to join... +[2024-07-05 10:58:42,189][17621] Waiting for process rollout_proc3 to join... +[2024-07-05 10:58:42,190][17621] Waiting for process rollout_proc4 to join... +[2024-07-05 10:58:42,190][17621] Waiting for process rollout_proc5 to join... +[2024-07-05 10:58:42,190][17621] Waiting for process rollout_proc6 to join... +[2024-07-05 10:58:42,191][17621] Waiting for process rollout_proc7 to join... +[2024-07-05 10:58:42,191][17621] Batcher 0 profile tree view: +batching: 11.5262, releasing_batches: 0.0404 +[2024-07-05 10:58:42,192][17621] InferenceWorker_p0-w0 profile tree view: wait_policy: 0.0000 - wait_policy_total: 57.6314 -update_model: 20.5515 - weight_update: 0.0010 -one_step: 0.0079 - handle_policy_step: 1280.0526 - deserialize: 93.0725, stack: 6.6231, obs_to_device_normalize: 317.1905, forward: 604.4397, send_messages: 61.8411 - prepare_outputs: 151.0372 - to_cpu: 92.1066 -[2024-07-05 14:45:25,805][03359] Learner 0 profile tree view: -misc: 0.0325, prepare_batch: 132.5906 -train: 392.1877 - epoch_init: 0.0284, minibatch_init: 0.0380, losses_postprocess: 1.6633, kl_divergence: 2.1525, after_optimizer: 1.7080 - calculate_losses: 188.4652 - losses_init: 0.0145, forward_head: 9.1082, bptt_initial: 156.2138, tail: 4.3261, advantages_returns: 1.1884, losses: 6.5924 - bptt: 7.9954 - bptt_forward_core: 7.6090 - update: 195.7529 - clip: 5.6586 -[2024-07-05 14:45:25,806][03359] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 0.6308, enqueue_policy_requests: 38.1639, env_step: 725.0510, overhead: 66.6004, complete_rollouts: 1.3416 -save_policy_outputs: 63.0363 - split_output_tensors: 29.9409 -[2024-07-05 14:45:25,806][03359] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 0.6282, enqueue_policy_requests: 39.3753, env_step: 747.1946, overhead: 69.5973, complete_rollouts: 1.7227 -save_policy_outputs: 61.3724 - split_output_tensors: 28.9288 -[2024-07-05 14:45:25,806][03359] Loop Runner_EvtLoop terminating... -[2024-07-05 14:45:25,807][03359] Runner profile tree view: -main_loop: 1413.2252 -[2024-07-05 14:45:25,807][03359] Collected {0: 450011136}, FPS: 35377.1 -[2024-07-05 14:45:41,941][03359] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 14:45:41,942][03359] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 14:45:41,943][03359] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 14:45:41,944][03359] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 14:45:41,944][03359] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 14:45:41,945][03359] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 14:45:41,945][03359] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 14:45:41,945][03359] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 14:45:41,946][03359] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 14:45:41,946][03359] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 14:45:41,946][03359] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 14:45:41,946][03359] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 14:45:41,947][03359] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 14:45:41,947][03359] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 14:45:41,947][03359] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 14:45:41,965][03359] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 14:45:41,967][03359] RunningMeanStd input shape: (1,) -[2024-07-05 14:45:41,978][03359] ConvEncoder: input_channels=3 -[2024-07-05 14:45:42,009][03359] Conv encoder output size: 512 -[2024-07-05 14:45:42,010][03359] Policy head output size: 512 -[2024-07-05 14:45:42,032][03359] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000057375_450011136.pth... -[2024-07-05 14:45:42,788][03359] Num frames 100... -[2024-07-05 14:45:42,904][03359] Num frames 200... -[2024-07-05 14:45:43,017][03359] Num frames 300... -[2024-07-05 14:45:43,135][03359] Num frames 400... -[2024-07-05 14:45:43,254][03359] Num frames 500... -[2024-07-05 14:45:43,370][03359] Num frames 600... -[2024-07-05 14:45:43,489][03359] Num frames 700... -[2024-07-05 14:45:43,590][03359] Avg episode rewards: #0: 13.360, true rewards: #0: 7.360 -[2024-07-05 14:45:43,591][03359] Avg episode reward: 13.360, avg true_objective: 7.360 -[2024-07-05 14:45:43,676][03359] Num frames 800... -[2024-07-05 14:45:43,795][03359] Num frames 900... -[2024-07-05 14:45:43,914][03359] Num frames 1000... -[2024-07-05 14:45:44,051][03359] Num frames 1100... -[2024-07-05 14:45:44,167][03359] Num frames 1200... -[2024-07-05 14:45:44,282][03359] Num frames 1300... -[2024-07-05 14:45:44,393][03359] Num frames 1400... -[2024-07-05 14:45:44,493][03359] Num frames 1500... -[2024-07-05 14:45:44,598][03359] Num frames 1600... -[2024-07-05 14:45:44,703][03359] Num frames 1700... -[2024-07-05 14:45:44,806][03359] Num frames 1800... -[2024-07-05 14:45:44,908][03359] Num frames 1900... -[2024-07-05 14:45:45,007][03359] Num frames 2000... -[2024-07-05 14:45:45,114][03359] Num frames 2100... -[2024-07-05 14:45:45,221][03359] Num frames 2200... -[2024-07-05 14:45:45,331][03359] Num frames 2300... -[2024-07-05 14:45:45,440][03359] Num frames 2400... -[2024-07-05 14:45:45,547][03359] Num frames 2500... -[2024-07-05 14:45:45,656][03359] Num frames 2600... -[2024-07-05 14:45:45,763][03359] Num frames 2700... -[2024-07-05 14:45:45,869][03359] Num frames 2800... -[2024-07-05 14:45:45,962][03359] Avg episode rewards: #0: 35.680, true rewards: #0: 14.180 -[2024-07-05 14:45:45,963][03359] Avg episode reward: 35.680, avg true_objective: 14.180 -[2024-07-05 14:45:46,032][03359] Num frames 2900... -[2024-07-05 14:45:46,127][03359] Num frames 3000... -[2024-07-05 14:45:46,223][03359] Num frames 3100... -[2024-07-05 14:45:46,317][03359] Num frames 3200... -[2024-07-05 14:45:46,422][03359] Num frames 3300... -[2024-07-05 14:45:46,530][03359] Num frames 3400... -[2024-07-05 14:45:46,633][03359] Num frames 3500... -[2024-07-05 14:45:46,720][03359] Num frames 3600... -[2024-07-05 14:45:46,802][03359] Num frames 3700... -[2024-07-05 14:45:46,884][03359] Num frames 3800... -[2024-07-05 14:45:46,965][03359] Num frames 3900... -[2024-07-05 14:45:47,049][03359] Num frames 4000... -[2024-07-05 14:45:47,133][03359] Num frames 4100... -[2024-07-05 14:45:47,212][03359] Num frames 4200... -[2024-07-05 14:45:47,285][03359] Num frames 4300... -[2024-07-05 14:45:47,360][03359] Num frames 4400... -[2024-07-05 14:45:47,436][03359] Num frames 4500... -[2024-07-05 14:45:47,512][03359] Num frames 4600... -[2024-07-05 14:45:47,590][03359] Num frames 4700... -[2024-07-05 14:45:47,668][03359] Num frames 4800... -[2024-07-05 14:45:47,745][03359] Num frames 4900... -[2024-07-05 14:45:47,827][03359] Avg episode rewards: #0: 44.119, true rewards: #0: 16.453 -[2024-07-05 14:45:47,828][03359] Avg episode reward: 44.119, avg true_objective: 16.453 -[2024-07-05 14:45:47,877][03359] Num frames 5000... -[2024-07-05 14:45:47,954][03359] Num frames 5100... -[2024-07-05 14:45:48,029][03359] Num frames 5200... -[2024-07-05 14:45:48,103][03359] Num frames 5300... -[2024-07-05 14:45:48,178][03359] Num frames 5400... -[2024-07-05 14:45:48,253][03359] Num frames 5500... -[2024-07-05 14:45:48,330][03359] Num frames 5600... -[2024-07-05 14:45:48,402][03359] Num frames 5700... -[2024-07-05 14:45:48,489][03359] Num frames 5800... -[2024-07-05 14:45:48,563][03359] Num frames 5900... -[2024-07-05 14:45:48,637][03359] Num frames 6000... -[2024-07-05 14:45:48,707][03359] Num frames 6100... -[2024-07-05 14:45:48,777][03359] Num frames 6200... -[2024-07-05 14:45:48,848][03359] Num frames 6300... -[2024-07-05 14:45:48,923][03359] Num frames 6400... -[2024-07-05 14:45:48,998][03359] Num frames 6500... -[2024-07-05 14:45:49,072][03359] Num frames 6600... -[2024-07-05 14:45:49,146][03359] Num frames 6700... -[2024-07-05 14:45:49,223][03359] Num frames 6800... -[2024-07-05 14:45:49,297][03359] Num frames 6900... -[2024-07-05 14:45:49,369][03359] Num frames 7000... -[2024-07-05 14:45:49,450][03359] Avg episode rewards: #0: 49.089, true rewards: #0: 17.590 -[2024-07-05 14:45:49,451][03359] Avg episode reward: 49.089, avg true_objective: 17.590 -[2024-07-05 14:45:49,500][03359] Num frames 7100... -[2024-07-05 14:45:49,574][03359] Num frames 7200... -[2024-07-05 14:45:49,649][03359] Num frames 7300... -[2024-07-05 14:45:49,725][03359] Num frames 7400... -[2024-07-05 14:45:49,799][03359] Num frames 7500... -[2024-07-05 14:45:49,874][03359] Num frames 7600... -[2024-07-05 14:45:49,952][03359] Num frames 7700... -[2024-07-05 14:45:50,028][03359] Num frames 7800... -[2024-07-05 14:45:50,102][03359] Num frames 7900... -[2024-07-05 14:45:50,178][03359] Num frames 8000... -[2024-07-05 14:45:50,253][03359] Num frames 8100... -[2024-07-05 14:45:50,329][03359] Num frames 8200... -[2024-07-05 14:45:50,385][03359] Avg episode rewards: #0: 46.205, true rewards: #0: 16.406 -[2024-07-05 14:45:50,386][03359] Avg episode reward: 46.205, avg true_objective: 16.406 -[2024-07-05 14:45:50,458][03359] Num frames 8300... -[2024-07-05 14:45:50,533][03359] Num frames 8400... -[2024-07-05 14:45:50,607][03359] Num frames 8500... -[2024-07-05 14:45:50,681][03359] Num frames 8600... -[2024-07-05 14:45:50,758][03359] Num frames 8700... -[2024-07-05 14:45:50,833][03359] Num frames 8800... -[2024-07-05 14:45:50,914][03359] Num frames 8900... -[2024-07-05 14:45:50,989][03359] Num frames 9000... -[2024-07-05 14:45:51,064][03359] Num frames 9100... -[2024-07-05 14:45:51,141][03359] Num frames 9200... -[2024-07-05 14:45:51,215][03359] Num frames 9300... -[2024-07-05 14:45:51,290][03359] Num frames 9400... -[2024-07-05 14:45:51,365][03359] Num frames 9500... -[2024-07-05 14:45:51,440][03359] Num frames 9600... -[2024-07-05 14:45:51,516][03359] Num frames 9700... -[2024-07-05 14:45:51,592][03359] Num frames 9800... -[2024-07-05 14:45:51,670][03359] Num frames 9900... -[2024-07-05 14:45:51,748][03359] Num frames 10000... -[2024-07-05 14:45:51,823][03359] Num frames 10100... -[2024-07-05 14:45:51,901][03359] Num frames 10200... -[2024-07-05 14:45:51,982][03359] Num frames 10300... -[2024-07-05 14:45:52,038][03359] Avg episode rewards: #0: 49.004, true rewards: #0: 17.172 -[2024-07-05 14:45:52,039][03359] Avg episode reward: 49.004, avg true_objective: 17.172 -[2024-07-05 14:45:52,113][03359] Num frames 10400... -[2024-07-05 14:45:52,188][03359] Num frames 10500... -[2024-07-05 14:45:52,262][03359] Num frames 10600... -[2024-07-05 14:45:52,338][03359] Num frames 10700... -[2024-07-05 14:45:52,413][03359] Num frames 10800... -[2024-07-05 14:45:52,490][03359] Num frames 10900... -[2024-07-05 14:45:52,565][03359] Num frames 11000... -[2024-07-05 14:45:52,640][03359] Num frames 11100... -[2024-07-05 14:45:52,712][03359] Num frames 11200... -[2024-07-05 14:45:52,787][03359] Num frames 11300... -[2024-07-05 14:45:52,865][03359] Num frames 11400... -[2024-07-05 14:45:52,940][03359] Num frames 11500... -[2024-07-05 14:45:53,015][03359] Num frames 11600... -[2024-07-05 14:45:53,090][03359] Num frames 11700... -[2024-07-05 14:45:53,165][03359] Num frames 11800... -[2024-07-05 14:45:53,242][03359] Num frames 11900... -[2024-07-05 14:45:53,318][03359] Num frames 12000... -[2024-07-05 14:45:53,395][03359] Num frames 12100... -[2024-07-05 14:45:53,470][03359] Num frames 12200... -[2024-07-05 14:45:53,546][03359] Num frames 12300... -[2024-07-05 14:45:53,621][03359] Num frames 12400... -[2024-07-05 14:45:53,678][03359] Avg episode rewards: #0: 51.003, true rewards: #0: 17.719 -[2024-07-05 14:45:53,680][03359] Avg episode reward: 51.003, avg true_objective: 17.719 -[2024-07-05 14:45:53,754][03359] Num frames 12500... -[2024-07-05 14:45:53,830][03359] Num frames 12600... -[2024-07-05 14:45:53,905][03359] Num frames 12700... -[2024-07-05 14:45:53,981][03359] Num frames 12800... -[2024-07-05 14:45:54,058][03359] Num frames 12900... -[2024-07-05 14:45:54,133][03359] Num frames 13000... -[2024-07-05 14:45:54,211][03359] Num frames 13100... -[2024-07-05 14:45:54,287][03359] Num frames 13200... -[2024-07-05 14:45:54,363][03359] Num frames 13300... -[2024-07-05 14:45:54,439][03359] Num frames 13400... -[2024-07-05 14:45:54,512][03359] Num frames 13500... -[2024-07-05 14:45:54,588][03359] Num frames 13600... -[2024-07-05 14:45:54,663][03359] Num frames 13700... -[2024-07-05 14:45:54,738][03359] Num frames 13800... -[2024-07-05 14:45:54,813][03359] Num frames 13900... -[2024-07-05 14:45:54,889][03359] Num frames 14000... -[2024-07-05 14:45:54,965][03359] Num frames 14100... -[2024-07-05 14:45:55,040][03359] Num frames 14200... -[2024-07-05 14:45:55,119][03359] Num frames 14300... -[2024-07-05 14:45:55,197][03359] Num frames 14400... -[2024-07-05 14:45:55,276][03359] Num frames 14500... -[2024-07-05 14:45:55,332][03359] Avg episode rewards: #0: 52.628, true rewards: #0: 18.129 -[2024-07-05 14:45:55,334][03359] Avg episode reward: 52.628, avg true_objective: 18.129 -[2024-07-05 14:45:55,405][03359] Num frames 14600... -[2024-07-05 14:45:55,498][03359] Num frames 14700... -[2024-07-05 14:45:55,572][03359] Num frames 14800... -[2024-07-05 14:45:55,649][03359] Num frames 14900... -[2024-07-05 14:45:55,723][03359] Num frames 15000... -[2024-07-05 14:45:55,797][03359] Num frames 15100... -[2024-07-05 14:45:55,860][03359] Num frames 15200... -[2024-07-05 14:45:55,919][03359] Num frames 15300... -[2024-07-05 14:45:55,979][03359] Num frames 15400... -[2024-07-05 14:45:56,038][03359] Num frames 15500... -[2024-07-05 14:45:56,110][03359] Num frames 15600... -[2024-07-05 14:45:56,184][03359] Num frames 15700... -[2024-07-05 14:45:56,257][03359] Num frames 15800... -[2024-07-05 14:45:56,320][03359] Num frames 15900... -[2024-07-05 14:45:56,388][03359] Num frames 16000... -[2024-07-05 14:45:56,449][03359] Num frames 16100... -[2024-07-05 14:45:56,516][03359] Avg episode rewards: #0: 51.136, true rewards: #0: 17.914 -[2024-07-05 14:45:56,518][03359] Avg episode reward: 51.136, avg true_objective: 17.914 -[2024-07-05 14:45:56,565][03359] Num frames 16200... -[2024-07-05 14:45:56,624][03359] Num frames 16300... -[2024-07-05 14:45:56,683][03359] Num frames 16400... -[2024-07-05 14:45:56,742][03359] Num frames 16500... -[2024-07-05 14:45:56,801][03359] Num frames 16600... -[2024-07-05 14:45:56,862][03359] Num frames 16700... -[2024-07-05 14:45:56,922][03359] Num frames 16800... -[2024-07-05 14:45:56,983][03359] Num frames 16900... -[2024-07-05 14:45:57,042][03359] Num frames 17000... -[2024-07-05 14:45:57,103][03359] Num frames 17100... -[2024-07-05 14:45:57,163][03359] Num frames 17200... -[2024-07-05 14:45:57,223][03359] Num frames 17300... -[2024-07-05 14:45:57,284][03359] Num frames 17400... -[2024-07-05 14:45:57,343][03359] Num frames 17500... -[2024-07-05 14:45:57,403][03359] Num frames 17600... -[2024-07-05 14:45:57,463][03359] Num frames 17700... -[2024-07-05 14:45:57,525][03359] Num frames 17800... -[2024-07-05 14:45:57,599][03359] Num frames 17900... -[2024-07-05 14:45:57,676][03359] Num frames 18000... -[2024-07-05 14:45:57,753][03359] Num frames 18100... -[2024-07-05 14:45:57,831][03359] Num frames 18200... -[2024-07-05 14:45:57,904][03359] Avg episode rewards: #0: 52.022, true rewards: #0: 18.223 -[2024-07-05 14:45:57,906][03359] Avg episode reward: 52.022, avg true_objective: 18.223 -[2024-07-05 14:46:19,829][03359] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 14:54:11,991][03423] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json... -[2024-07-05 14:54:11,992][03423] Rollout worker 0 uses device cpu -[2024-07-05 14:54:11,992][03423] Rollout worker 1 uses device cpu -[2024-07-05 14:54:11,993][03423] Rollout worker 2 uses device cpu -[2024-07-05 14:54:11,993][03423] Rollout worker 3 uses device cpu -[2024-07-05 14:54:11,993][03423] Rollout worker 4 uses device cpu -[2024-07-05 14:54:11,994][03423] Rollout worker 5 uses device cpu -[2024-07-05 14:54:11,994][03423] Rollout worker 6 uses device cpu -[2024-07-05 14:54:11,994][03423] Rollout worker 7 uses device cpu -[2024-07-05 14:54:11,995][03423] Rollout worker 8 uses device cpu -[2024-07-05 14:54:11,995][03423] Rollout worker 9 uses device cpu -[2024-07-05 14:54:11,995][03423] Rollout worker 10 uses device cpu -[2024-07-05 14:54:11,996][03423] Rollout worker 11 uses device cpu -[2024-07-05 14:54:11,996][03423] Rollout worker 12 uses device cpu -[2024-07-05 14:54:11,996][03423] Rollout worker 13 uses device cpu -[2024-07-05 14:54:11,996][03423] Rollout worker 14 uses device cpu -[2024-07-05 14:54:11,997][03423] Rollout worker 15 uses device cpu -[2024-07-05 14:54:12,107][03423] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 14:54:12,108][03423] InferenceWorker_p0-w0: min num requests: 5 -[2024-07-05 14:54:12,180][03423] Starting all processes... -[2024-07-05 14:54:12,181][03423] Starting process learner_proc0 -[2024-07-05 14:54:12,753][03423] Starting all processes... -[2024-07-05 14:54:12,759][03423] Starting process inference_proc0-0 -[2024-07-05 14:54:12,761][03423] Starting process rollout_proc0 -[2024-07-05 14:54:12,761][03423] Starting process rollout_proc1 -[2024-07-05 14:54:12,762][03423] Starting process rollout_proc2 -[2024-07-05 14:54:12,763][03423] Starting process rollout_proc3 -[2024-07-05 14:54:12,763][03423] Starting process rollout_proc4 -[2024-07-05 14:54:12,768][03423] Starting process rollout_proc5 -[2024-07-05 14:54:12,770][03423] Starting process rollout_proc6 -[2024-07-05 14:54:12,771][03423] Starting process rollout_proc7 -[2024-07-05 14:54:12,772][03423] Starting process rollout_proc8 -[2024-07-05 14:54:12,774][03423] Starting process rollout_proc9 -[2024-07-05 14:54:12,774][03423] Starting process rollout_proc10 -[2024-07-05 14:54:12,774][03423] Starting process rollout_proc11 -[2024-07-05 14:54:12,774][03423] Starting process rollout_proc12 -[2024-07-05 14:54:12,776][03423] Starting process rollout_proc13 -[2024-07-05 14:54:12,776][03423] Starting process rollout_proc14 -[2024-07-05 14:54:12,810][03423] Starting process rollout_proc15 -[2024-07-05 14:54:16,942][04007] Worker 15 uses CPU cores [15] -[2024-07-05 14:54:17,113][03956] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 14:54:17,114][03956] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-07-05 14:54:17,153][03976] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 14:54:17,153][03976] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-07-05 14:54:17,165][04005] Worker 12 uses CPU cores [12] -[2024-07-05 14:54:17,191][03956] Num visible devices: 1 -[2024-07-05 14:54:17,206][03984] Worker 8 uses CPU cores [8] -[2024-07-05 14:54:17,249][03956] Setting fixed seed 200 -[2024-07-05 14:54:17,249][03976] Num visible devices: 1 -[2024-07-05 14:54:17,254][03979] Worker 3 uses CPU cores [3] -[2024-07-05 14:54:17,265][03956] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 14:54:17,265][03956] Initializing actor-critic model on device cuda:0 -[2024-07-05 14:54:17,266][03956] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 14:54:17,266][03980] Worker 4 uses CPU cores [4] -[2024-07-05 14:54:17,268][03956] RunningMeanStd input shape: (1,) -[2024-07-05 14:54:17,282][03956] ConvEncoder: input_channels=3 -[2024-07-05 14:54:17,298][04006] Worker 14 uses CPU cores [14] -[2024-07-05 14:54:17,314][04004] Worker 13 uses CPU cores [13] -[2024-07-05 14:54:17,354][03978] Worker 1 uses CPU cores [1] -[2024-07-05 14:54:17,390][03985] Worker 7 uses CPU cores [7] -[2024-07-05 14:54:17,406][03981] Worker 2 uses CPU cores [2] -[2024-07-05 14:54:17,422][03982] Worker 6 uses CPU cores [6] -[2024-07-05 14:54:17,426][03956] Conv encoder output size: 512 -[2024-07-05 14:54:17,427][03956] Policy head output size: 512 -[2024-07-05 14:54:17,436][03987] Worker 11 uses CPU cores [11] -[2024-07-05 14:54:17,438][03986] Worker 10 uses CPU cores [10] -[2024-07-05 14:54:17,450][03956] Created Actor Critic model with architecture: -[2024-07-05 14:54:17,450][03956] ActorCriticSharedWeights( + wait_policy_total: 4.7420 +update_model: 6.4541 + weight_update: 0.0011 +one_step: 0.0028 + handle_policy_step: 831.2903 + deserialize: 14.0334, stack: 1.9051, obs_to_device_normalize: 140.6688, forward: 486.9789, send_messages: 16.8591 + prepare_outputs: 158.2148 + to_cpu: 143.4491 +[2024-07-05 10:58:42,192][17621] Learner 0 profile tree view: +misc: 0.0081, prepare_batch: 24.7747 +train: 621.7223 + epoch_init: 0.0069, minibatch_init: 0.0093, losses_postprocess: 0.7488, kl_divergence: 0.5096, after_optimizer: 2.1024 + calculate_losses: 204.2291 + losses_init: 0.0040, forward_head: 3.0489, bptt_initial: 196.2611, tail: 0.9628, advantages_returns: 0.2643, losses: 1.9828 + bptt: 1.4519 + bptt_forward_core: 1.3872 + update: 413.5385 + clip: 1.8356 +[2024-07-05 10:58:42,192][17621] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.1888, enqueue_policy_requests: 10.8761, env_step: 156.2023, overhead: 13.0633, complete_rollouts: 0.3381 +save_policy_outputs: 13.9536 + split_output_tensors: 6.6527 +[2024-07-05 10:58:42,192][17621] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.2006, enqueue_policy_requests: 11.7703, env_step: 169.0861, overhead: 14.4208, complete_rollouts: 0.3581 +save_policy_outputs: 13.8412 + split_output_tensors: 6.5571 +[2024-07-05 10:58:42,193][17621] Loop Runner_EvtLoop terminating... +[2024-07-05 10:58:42,193][17621] Runner profile tree view: +main_loop: 866.2605 +[2024-07-05 10:58:42,194][17621] Collected {0: 20004864}, FPS: 11542.0 +[2024-07-05 11:00:05,182][17621] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json +[2024-07-05 11:00:05,183][17621] Overriding arg 'num_workers' with value 1 passed from command line +[2024-07-05 11:00:05,184][17621] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-07-05 11:00:05,184][17621] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-07-05 11:00:05,184][17621] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 11:00:05,185][17621] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-07-05 11:00:05,185][17621] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 11:00:05,185][17621] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-07-05 11:00:05,186][17621] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-07-05 11:00:05,186][17621] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-07-05 11:00:05,186][17621] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-07-05 11:00:05,187][17621] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-07-05 11:00:05,187][17621] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-07-05 11:00:05,187][17621] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-07-05 11:00:05,188][17621] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-07-05 11:00:05,201][17621] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 11:00:05,202][17621] RunningMeanStd input shape: (1,) +[2024-07-05 11:00:05,208][17621] Num input channels: 3 +[2024-07-05 11:00:05,213][17621] Convolutional layer output size: 4608 +[2024-07-05 11:00:05,222][17621] Policy head output size: 512 +[2024-07-05 11:00:05,277][17621] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004884_20004864.pth... +[2024-07-05 11:00:05,866][17621] Num frames 100... +[2024-07-05 11:00:05,937][17621] Num frames 200... +[2024-07-05 11:00:06,008][17621] Num frames 300... +[2024-07-05 11:00:06,078][17621] Num frames 400... +[2024-07-05 11:00:06,150][17621] Num frames 500... +[2024-07-05 11:00:06,223][17621] Num frames 600... +[2024-07-05 11:00:06,295][17621] Num frames 700... +[2024-07-05 11:00:06,368][17621] Num frames 800... +[2024-07-05 11:00:06,442][17621] Num frames 900... +[2024-07-05 11:00:06,509][17621] Avg episode rewards: #0: 20.170, true rewards: #0: 9.170 +[2024-07-05 11:00:06,510][17621] Avg episode reward: 20.170, avg true_objective: 9.170 +[2024-07-05 11:00:06,571][17621] Num frames 1000... +[2024-07-05 11:00:06,647][17621] Num frames 1100... +[2024-07-05 11:00:06,717][17621] Num frames 1200... +[2024-07-05 11:00:06,790][17621] Num frames 1300... +[2024-07-05 11:00:06,861][17621] Num frames 1400... +[2024-07-05 11:00:06,932][17621] Num frames 1500... +[2024-07-05 11:00:07,028][17621] Avg episode rewards: #0: 15.785, true rewards: #0: 7.785 +[2024-07-05 11:00:07,029][17621] Avg episode reward: 15.785, avg true_objective: 7.785 +[2024-07-05 11:00:07,066][17621] Num frames 1600... +[2024-07-05 11:00:07,137][17621] Num frames 1700... +[2024-07-05 11:00:07,209][17621] Num frames 1800... +[2024-07-05 11:00:07,281][17621] Num frames 1900... +[2024-07-05 11:00:07,355][17621] Num frames 2000... +[2024-07-05 11:00:07,427][17621] Num frames 2100... +[2024-07-05 11:00:07,499][17621] Num frames 2200... +[2024-07-05 11:00:07,599][17621] Avg episode rewards: #0: 15.870, true rewards: #0: 7.537 +[2024-07-05 11:00:07,601][17621] Avg episode reward: 15.870, avg true_objective: 7.537 +[2024-07-05 11:00:07,637][17621] Num frames 2300... +[2024-07-05 11:00:07,707][17621] Num frames 2400... +[2024-07-05 11:00:07,780][17621] Num frames 2500... +[2024-07-05 11:00:07,853][17621] Num frames 2600... +[2024-07-05 11:00:07,926][17621] Num frames 2700... +[2024-07-05 11:00:07,997][17621] Num frames 2800... +[2024-07-05 11:00:08,070][17621] Num frames 2900... +[2024-07-05 11:00:08,143][17621] Num frames 3000... +[2024-07-05 11:00:08,218][17621] Num frames 3100... +[2024-07-05 11:00:08,290][17621] Num frames 3200... +[2024-07-05 11:00:08,363][17621] Num frames 3300... +[2024-07-05 11:00:08,473][17621] Avg episode rewards: #0: 17.448, true rewards: #0: 8.447 +[2024-07-05 11:00:08,475][17621] Avg episode reward: 17.448, avg true_objective: 8.447 +[2024-07-05 11:00:08,498][17621] Num frames 3400... +[2024-07-05 11:00:08,570][17621] Num frames 3500... +[2024-07-05 11:00:08,641][17621] Num frames 3600... +[2024-07-05 11:00:08,712][17621] Num frames 3700... +[2024-07-05 11:00:08,812][17621] Avg episode rewards: #0: 14.726, true rewards: #0: 7.526 +[2024-07-05 11:00:08,813][17621] Avg episode reward: 14.726, avg true_objective: 7.526 +[2024-07-05 11:00:08,846][17621] Num frames 3800... +[2024-07-05 11:00:08,923][17621] Num frames 3900... +[2024-07-05 11:00:08,997][17621] Num frames 4000... +[2024-07-05 11:00:09,075][17621] Num frames 4100... +[2024-07-05 11:00:09,150][17621] Num frames 4200... +[2024-07-05 11:00:09,226][17621] Num frames 4300... +[2024-07-05 11:00:09,313][17621] Num frames 4400... +[2024-07-05 11:00:09,385][17621] Num frames 4500... +[2024-07-05 11:00:09,457][17621] Num frames 4600... +[2024-07-05 11:00:09,532][17621] Num frames 4700... +[2024-07-05 11:00:09,608][17621] Num frames 4800... +[2024-07-05 11:00:09,683][17621] Num frames 4900... +[2024-07-05 11:00:09,756][17621] Num frames 5000... +[2024-07-05 11:00:09,832][17621] Num frames 5100... +[2024-07-05 11:00:09,904][17621] Num frames 5200... +[2024-07-05 11:00:09,981][17621] Num frames 5300... +[2024-07-05 11:00:10,053][17621] Num frames 5400... +[2024-07-05 11:00:10,131][17621] Num frames 5500... +[2024-07-05 11:00:10,206][17621] Num frames 5600... +[2024-07-05 11:00:10,290][17621] Num frames 5700... +[2024-07-05 11:00:10,367][17621] Num frames 5800... +[2024-07-05 11:00:10,468][17621] Avg episode rewards: #0: 21.105, true rewards: #0: 9.772 +[2024-07-05 11:00:10,469][17621] Avg episode reward: 21.105, avg true_objective: 9.772 +[2024-07-05 11:00:10,504][17621] Num frames 5900... +[2024-07-05 11:00:10,581][17621] Num frames 6000... +[2024-07-05 11:00:10,656][17621] Num frames 6100... +[2024-07-05 11:00:10,731][17621] Num frames 6200... +[2024-07-05 11:00:10,804][17621] Num frames 6300... +[2024-07-05 11:00:10,876][17621] Num frames 6400... +[2024-07-05 11:00:10,951][17621] Num frames 6500... +[2024-07-05 11:00:11,022][17621] Num frames 6600... +[2024-07-05 11:00:11,094][17621] Num frames 6700... +[2024-07-05 11:00:11,168][17621] Num frames 6800... +[2024-07-05 11:00:11,240][17621] Num frames 6900... +[2024-07-05 11:00:11,313][17621] Num frames 7000... +[2024-07-05 11:00:11,389][17621] Num frames 7100... +[2024-07-05 11:00:11,462][17621] Num frames 7200... +[2024-07-05 11:00:11,537][17621] Num frames 7300... +[2024-07-05 11:00:11,614][17621] Num frames 7400... +[2024-07-05 11:00:11,688][17621] Num frames 7500... +[2024-07-05 11:00:11,761][17621] Num frames 7600... +[2024-07-05 11:00:11,836][17621] Num frames 7700... +[2024-07-05 11:00:11,911][17621] Num frames 7800... +[2024-07-05 11:00:11,984][17621] Num frames 7900... +[2024-07-05 11:00:12,084][17621] Avg episode rewards: #0: 25.518, true rewards: #0: 11.376 +[2024-07-05 11:00:12,086][17621] Avg episode reward: 25.518, avg true_objective: 11.376 +[2024-07-05 11:00:12,120][17621] Num frames 8000... +[2024-07-05 11:00:12,190][17621] Num frames 8100... +[2024-07-05 11:00:12,264][17621] Num frames 8200... +[2024-07-05 11:00:12,345][17621] Num frames 8300... +[2024-07-05 11:00:12,415][17621] Num frames 8400... +[2024-07-05 11:00:12,492][17621] Num frames 8500... +[2024-07-05 11:00:12,569][17621] Num frames 8600... +[2024-07-05 11:00:12,642][17621] Num frames 8700... +[2024-07-05 11:00:12,714][17621] Num frames 8800... +[2024-07-05 11:00:12,787][17621] Num frames 8900... +[2024-07-05 11:00:12,861][17621] Num frames 9000... +[2024-07-05 11:00:12,935][17621] Num frames 9100... +[2024-07-05 11:00:13,023][17621] Avg episode rewards: #0: 25.434, true rewards: #0: 11.434 +[2024-07-05 11:00:13,025][17621] Avg episode reward: 25.434, avg true_objective: 11.434 +[2024-07-05 11:00:13,065][17621] Num frames 9200... +[2024-07-05 11:00:13,137][17621] Num frames 9300... +[2024-07-05 11:00:13,211][17621] Num frames 9400... +[2024-07-05 11:00:13,281][17621] Num frames 9500... +[2024-07-05 11:00:13,354][17621] Num frames 9600... +[2024-07-05 11:00:13,424][17621] Num frames 9700... +[2024-07-05 11:00:13,496][17621] Num frames 9800... +[2024-07-05 11:00:13,611][17621] Avg episode rewards: #0: 24.314, true rewards: #0: 10.981 +[2024-07-05 11:00:13,612][17621] Avg episode reward: 24.314, avg true_objective: 10.981 +[2024-07-05 11:00:13,628][17621] Num frames 9900... +[2024-07-05 11:00:13,703][17621] Num frames 10000... +[2024-07-05 11:00:13,775][17621] Num frames 10100... +[2024-07-05 11:00:13,846][17621] Num frames 10200... +[2024-07-05 11:00:13,919][17621] Num frames 10300... +[2024-07-05 11:00:13,992][17621] Num frames 10400... +[2024-07-05 11:00:14,065][17621] Num frames 10500... +[2024-07-05 11:00:14,137][17621] Num frames 10600... +[2024-07-05 11:00:14,211][17621] Num frames 10700... +[2024-07-05 11:00:14,284][17621] Num frames 10800... +[2024-07-05 11:00:14,360][17621] Num frames 10900... +[2024-07-05 11:00:14,434][17621] Num frames 11000... +[2024-07-05 11:00:14,507][17621] Num frames 11100... +[2024-07-05 11:00:14,580][17621] Num frames 11200... +[2024-07-05 11:00:14,652][17621] Num frames 11300... +[2024-07-05 11:00:14,729][17621] Num frames 11400... +[2024-07-05 11:00:14,800][17621] Num frames 11500... +[2024-07-05 11:00:14,875][17621] Num frames 11600... +[2024-07-05 11:00:14,949][17621] Num frames 11700... +[2024-07-05 11:00:15,024][17621] Num frames 11800... +[2024-07-05 11:00:15,103][17621] Num frames 11900... +[2024-07-05 11:00:15,218][17621] Avg episode rewards: #0: 27.083, true rewards: #0: 11.983 +[2024-07-05 11:00:15,219][17621] Avg episode reward: 27.083, avg true_objective: 11.983 +[2024-07-05 11:00:27,747][17621] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/replay.mp4! +[2024-07-05 15:58:39,084][04005] Saving configuration to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json... +[2024-07-05 15:58:39,086][04005] Rollout worker 0 uses device cpu +[2024-07-05 15:58:39,086][04005] Rollout worker 1 uses device cpu +[2024-07-05 15:58:39,086][04005] Rollout worker 2 uses device cpu +[2024-07-05 15:58:39,087][04005] Rollout worker 3 uses device cpu +[2024-07-05 15:58:39,087][04005] Rollout worker 4 uses device cpu +[2024-07-05 15:58:39,087][04005] Rollout worker 5 uses device cpu +[2024-07-05 15:58:39,088][04005] Rollout worker 6 uses device cpu +[2024-07-05 15:58:39,088][04005] Rollout worker 7 uses device cpu +[2024-07-05 15:58:39,130][04005] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 15:58:39,131][04005] InferenceWorker_p0-w0: min num requests: 2 +[2024-07-05 15:58:39,157][04005] Starting all processes... +[2024-07-05 15:58:39,157][04005] Starting process learner_proc0 +[2024-07-05 15:58:39,884][04005] Starting all processes... +[2024-07-05 15:58:39,889][04005] Starting process inference_proc0-0 +[2024-07-05 15:58:39,889][04005] Starting process rollout_proc0 +[2024-07-05 15:58:39,890][04005] Starting process rollout_proc1 +[2024-07-05 15:58:39,890][04005] Starting process rollout_proc2 +[2024-07-05 15:58:39,890][04005] Starting process rollout_proc3 +[2024-07-05 15:58:39,891][04005] Starting process rollout_proc4 +[2024-07-05 15:58:39,891][04005] Starting process rollout_proc5 +[2024-07-05 15:58:39,892][04005] Starting process rollout_proc6 +[2024-07-05 15:58:39,894][04005] Starting process rollout_proc7 +[2024-07-05 15:58:42,480][04599] Worker 3 uses CPU cores [6, 7] +[2024-07-05 15:58:42,497][04596] Worker 2 uses CPU cores [4, 5] +[2024-07-05 15:58:42,498][04581] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 15:58:42,498][04581] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-07-05 15:58:42,548][04601] Worker 7 uses CPU cores [14, 15] +[2024-07-05 15:58:42,553][04581] Num visible devices: 1 +[2024-07-05 15:58:42,567][04595] Worker 0 uses CPU cores [0, 1] +[2024-07-05 15:58:42,585][04581] Setting fixed seed 200 +[2024-07-05 15:58:42,588][04581] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 15:58:42,588][04581] Initializing actor-critic model on device cuda:0 +[2024-07-05 15:58:42,588][04581] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 15:58:42,589][04581] RunningMeanStd input shape: (1,) +[2024-07-05 15:58:42,598][04581] Num input channels: 3 +[2024-07-05 15:58:42,599][04602] Worker 6 uses CPU cores [12, 13] +[2024-07-05 15:58:42,630][04581] Convolutional layer output size: 4608 +[2024-07-05 15:58:42,644][04581] Policy head output size: 512 +[2024-07-05 15:58:42,718][04598] Worker 4 uses CPU cores [8, 9] +[2024-07-05 15:58:42,762][04581] Created Actor Critic model with architecture: +[2024-07-05 15:58:42,762][04581] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -19264,23 +4731,67 @@ main_loop: 1413.2252 ) (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) (encoder): VizdoomEncoder( - (basic_encoder): ConvEncoder( - (enc): RecursiveScriptModule( - original_name=ConvEncoderImpl - (conv_head): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Conv2d) - (1): RecursiveScriptModule(original_name=ELU) - (2): RecursiveScriptModule(original_name=Conv2d) - (3): RecursiveScriptModule(original_name=ELU) - (4): RecursiveScriptModule(original_name=Conv2d) - (5): RecursiveScriptModule(original_name=ELU) + (basic_encoder): ResnetEncoder( + (conv_head): Sequential( + (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (2): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (3): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (5): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (6): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (7): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) - (mlp_layers): RecursiveScriptModule( - original_name=Sequential - (0): RecursiveScriptModule(original_name=Linear) - (1): RecursiveScriptModule(original_name=ELU) + (8): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (9): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) + (10): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) ) + (11): ResBlock( + (res_block_core): Sequential( + (0): ELU(alpha=1.0) + (1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + (2): ELU(alpha=1.0) + (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + ) + ) + (12): ELU(alpha=1.0) + ) + (mlp_layers): Sequential( + (0): Linear(in_features=4608, out_features=512, bias=True) + (1): ELU(alpha=1.0) ) ) ) @@ -19295,3932 +4806,3909 @@ main_loop: 1413.2252 (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-07-05 14:54:17,551][03983] Worker 5 uses CPU cores [5] -[2024-07-05 14:54:17,583][03977] Worker 0 uses CPU cores [0] -[2024-07-05 14:54:17,589][03956] Using optimizer -[2024-07-05 14:54:17,618][03988] Worker 9 uses CPU cores [9] -[2024-07-05 14:54:18,093][03956] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000057375_450011136.pth... -[2024-07-05 14:54:18,140][03956] Loading model from checkpoint -[2024-07-05 14:54:18,141][03956] Loaded experiment state at self.train_step=57375, self.env_steps=450011136 -[2024-07-05 14:54:18,141][03956] Initialized policy 0 weights for model version 57375 -[2024-07-05 14:54:18,142][03956] LearnerWorker_p0 finished initialization! -[2024-07-05 14:54:18,142][03956] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-07-05 14:54:18,230][03976] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 14:54:18,230][03976] RunningMeanStd input shape: (1,) -[2024-07-05 14:54:18,237][03976] ConvEncoder: input_channels=3 -[2024-07-05 14:54:18,292][03976] Conv encoder output size: 512 -[2024-07-05 14:54:18,292][03976] Policy head output size: 512 -[2024-07-05 14:54:18,328][03423] Inference worker 0-0 is ready! -[2024-07-05 14:54:18,328][03423] All inference workers are ready! Signal rollout workers to start! -[2024-07-05 14:54:18,399][03983] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,404][03985] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,405][03988] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,405][04006] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,406][03979] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,406][04005] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,407][03978] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,408][03977] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,410][03984] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,411][03987] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,414][03981] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,418][03986] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,420][03980] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,423][04004] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,422][04007] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,423][03982] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 14:54:18,783][03987] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,130][03980] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,132][04006] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,132][03985] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,133][03978] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,133][04005] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,135][03988] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,135][03979] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,137][03984] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,322][03988] Decorrelating experience for 32 frames... -[2024-07-05 14:54:19,326][03979] Decorrelating experience for 32 frames... -[2024-07-05 14:54:19,335][03987] Decorrelating experience for 32 frames... -[2024-07-05 14:54:19,341][03977] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,385][04004] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,386][04007] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,388][04005] Decorrelating experience for 32 frames... -[2024-07-05 14:54:19,512][03984] Decorrelating experience for 32 frames... -[2024-07-05 14:54:19,513][03985] Decorrelating experience for 32 frames... -[2024-07-05 14:54:19,519][03977] Decorrelating experience for 32 frames... -[2024-07-05 14:54:19,523][03987] Decorrelating experience for 64 frames... -[2024-07-05 14:54:19,567][03981] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,567][04004] Decorrelating experience for 32 frames... -[2024-07-05 14:54:19,588][03983] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,722][03985] Decorrelating experience for 64 frames... -[2024-07-05 14:54:19,743][03984] Decorrelating experience for 64 frames... -[2024-07-05 14:54:19,758][03983] Decorrelating experience for 32 frames... -[2024-07-05 14:54:19,765][03977] Decorrelating experience for 64 frames... -[2024-07-05 14:54:19,770][03978] Decorrelating experience for 32 frames... -[2024-07-05 14:54:19,833][03982] Decorrelating experience for 0 frames... -[2024-07-05 14:54:19,922][03987] Decorrelating experience for 96 frames... -[2024-07-05 14:54:19,948][03983] Decorrelating experience for 64 frames... -[2024-07-05 14:54:19,968][03977] Decorrelating experience for 96 frames... -[2024-07-05 14:54:19,991][04005] Decorrelating experience for 64 frames... -[2024-07-05 14:54:20,017][03988] Decorrelating experience for 64 frames... -[2024-07-05 14:54:20,021][04004] Decorrelating experience for 64 frames... -[2024-07-05 14:54:20,172][03982] Decorrelating experience for 32 frames... -[2024-07-05 14:54:20,196][03980] Decorrelating experience for 32 frames... -[2024-07-05 14:54:20,207][03981] Decorrelating experience for 32 frames... -[2024-07-05 14:54:20,229][03986] Decorrelating experience for 0 frames... -[2024-07-05 14:54:20,233][03978] Decorrelating experience for 64 frames... -[2024-07-05 14:54:20,234][03983] Decorrelating experience for 96 frames... -[2024-07-05 14:54:20,254][04004] Decorrelating experience for 96 frames... -[2024-07-05 14:54:20,425][04005] Decorrelating experience for 96 frames... -[2024-07-05 14:54:20,428][03982] Decorrelating experience for 64 frames... -[2024-07-05 14:54:20,436][04006] Decorrelating experience for 32 frames... -[2024-07-05 14:54:20,436][03977] Decorrelating experience for 128 frames... -[2024-07-05 14:54:20,476][03987] Decorrelating experience for 128 frames... -[2024-07-05 14:54:20,528][03983] Decorrelating experience for 128 frames... -[2024-07-05 14:54:20,551][03986] Decorrelating experience for 32 frames... -[2024-07-05 14:54:20,651][03979] Decorrelating experience for 64 frames... -[2024-07-05 14:54:20,667][03978] Decorrelating experience for 96 frames... -[2024-07-05 14:54:20,673][04004] Decorrelating experience for 128 frames... -[2024-07-05 14:54:20,696][03988] Decorrelating experience for 96 frames... -[2024-07-05 14:54:20,724][03982] Decorrelating experience for 96 frames... -[2024-07-05 14:54:20,741][03985] Decorrelating experience for 96 frames... -[2024-07-05 14:54:20,756][03987] Decorrelating experience for 160 frames... -[2024-07-05 14:54:20,912][04007] Decorrelating experience for 32 frames... -[2024-07-05 14:54:20,932][03984] Decorrelating experience for 96 frames... -[2024-07-05 14:54:20,938][03983] Decorrelating experience for 160 frames... -[2024-07-05 14:54:20,957][03981] Decorrelating experience for 64 frames... -[2024-07-05 14:54:20,964][03979] Decorrelating experience for 96 frames... -[2024-07-05 14:54:21,012][03986] Decorrelating experience for 64 frames... -[2024-07-05 14:54:21,047][03980] Decorrelating experience for 64 frames... -[2024-07-05 14:54:21,113][03982] Decorrelating experience for 128 frames... -[2024-07-05 14:54:21,163][04006] Decorrelating experience for 64 frames... -[2024-07-05 14:54:21,168][03423] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 450011136. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-07-05 14:54:21,176][04005] Decorrelating experience for 128 frames... -[2024-07-05 14:54:21,182][03987] Decorrelating experience for 192 frames... -[2024-07-05 14:54:21,222][03988] Decorrelating experience for 128 frames... -[2024-07-05 14:54:21,272][03981] Decorrelating experience for 96 frames... -[2024-07-05 14:54:21,350][03985] Decorrelating experience for 128 frames... -[2024-07-05 14:54:21,412][03979] Decorrelating experience for 128 frames... -[2024-07-05 14:54:21,415][03984] Decorrelating experience for 128 frames... -[2024-07-05 14:54:21,450][03980] Decorrelating experience for 96 frames... -[2024-07-05 14:54:21,466][03982] Decorrelating experience for 160 frames... -[2024-07-05 14:54:21,474][04005] Decorrelating experience for 160 frames... -[2024-07-05 14:54:21,526][03983] Decorrelating experience for 192 frames... -[2024-07-05 14:54:21,547][03987] Decorrelating experience for 224 frames... -[2024-07-05 14:54:21,600][03988] Decorrelating experience for 160 frames... -[2024-07-05 14:54:21,664][04004] Decorrelating experience for 160 frames... -[2024-07-05 14:54:21,711][03979] Decorrelating experience for 160 frames... -[2024-07-05 14:54:21,716][03977] Decorrelating experience for 160 frames... -[2024-07-05 14:54:21,779][03984] Decorrelating experience for 160 frames... -[2024-07-05 14:54:21,836][03982] Decorrelating experience for 192 frames... -[2024-07-05 14:54:21,932][03983] Decorrelating experience for 224 frames... -[2024-07-05 14:54:21,960][03985] Decorrelating experience for 160 frames... -[2024-07-05 14:54:21,974][03980] Decorrelating experience for 128 frames... -[2024-07-05 14:54:22,030][04004] Decorrelating experience for 192 frames... -[2024-07-05 14:54:22,069][04007] Decorrelating experience for 64 frames... -[2024-07-05 14:54:22,069][04006] Decorrelating experience for 96 frames... -[2024-07-05 14:54:22,083][03977] Decorrelating experience for 192 frames... -[2024-07-05 14:54:22,174][04005] Decorrelating experience for 192 frames... -[2024-07-05 14:54:22,217][03988] Decorrelating experience for 192 frames... -[2024-07-05 14:54:22,297][03979] Decorrelating experience for 192 frames... -[2024-07-05 14:54:22,297][03981] Decorrelating experience for 128 frames... -[2024-07-05 14:54:22,299][03980] Decorrelating experience for 160 frames... -[2024-07-05 14:54:22,310][03986] Decorrelating experience for 96 frames... -[2024-07-05 14:54:22,462][03982] Decorrelating experience for 224 frames... -[2024-07-05 14:54:22,467][04006] Decorrelating experience for 128 frames... -[2024-07-05 14:54:22,541][03988] Decorrelating experience for 224 frames... -[2024-07-05 14:54:22,554][04005] Decorrelating experience for 224 frames... -[2024-07-05 14:54:22,588][04007] Decorrelating experience for 96 frames... -[2024-07-05 14:54:22,594][04004] Decorrelating experience for 224 frames... -[2024-07-05 14:54:22,622][03980] Decorrelating experience for 192 frames... -[2024-07-05 14:54:22,836][03985] Decorrelating experience for 192 frames... -[2024-07-05 14:54:22,866][03986] Decorrelating experience for 128 frames... -[2024-07-05 14:54:22,888][03978] Decorrelating experience for 128 frames... -[2024-07-05 14:54:22,944][03981] Decorrelating experience for 160 frames... -[2024-07-05 14:54:22,996][03979] Decorrelating experience for 224 frames... -[2024-07-05 14:54:23,179][03984] Decorrelating experience for 192 frames... -[2024-07-05 14:54:23,269][03977] Decorrelating experience for 224 frames... -[2024-07-05 14:54:23,281][03986] Decorrelating experience for 160 frames... -[2024-07-05 14:54:23,318][04007] Decorrelating experience for 128 frames... -[2024-07-05 14:54:23,366][03985] Decorrelating experience for 224 frames... -[2024-07-05 14:54:23,421][03978] Decorrelating experience for 160 frames... -[2024-07-05 14:54:23,584][03980] Decorrelating experience for 224 frames... -[2024-07-05 14:54:23,702][03981] Decorrelating experience for 192 frames... -[2024-07-05 14:54:23,745][03986] Decorrelating experience for 192 frames... -[2024-07-05 14:54:23,859][03978] Decorrelating experience for 192 frames... -[2024-07-05 14:54:23,970][04006] Decorrelating experience for 160 frames... -[2024-07-05 14:54:24,063][04007] Decorrelating experience for 160 frames... -[2024-07-05 14:54:24,131][03981] Decorrelating experience for 224 frames... -[2024-07-05 14:54:24,153][03956] Signal inference workers to stop experience collection... -[2024-07-05 14:54:24,163][03976] InferenceWorker_p0-w0: stopping experience collection -[2024-07-05 14:54:24,177][03986] Decorrelating experience for 224 frames... -[2024-07-05 14:54:24,260][03978] Decorrelating experience for 224 frames... -[2024-07-05 14:54:24,280][03984] Decorrelating experience for 224 frames... -[2024-07-05 14:54:24,413][04007] Decorrelating experience for 192 frames... -[2024-07-05 14:54:24,539][04006] Decorrelating experience for 192 frames... -[2024-07-05 14:54:24,718][04007] Decorrelating experience for 224 frames... -[2024-07-05 14:54:24,848][04006] Decorrelating experience for 224 frames... -[2024-07-05 14:54:25,813][03956] Signal inference workers to resume experience collection... -[2024-07-05 14:54:25,814][03976] InferenceWorker_p0-w0: resuming experience collection -[2024-07-05 14:54:26,168][03423] Fps is (10 sec: 1638.4, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 450019328. Throughput: 0: 1153.6. Samples: 5768. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:54:26,168][03423] Avg episode reward: [(0, '0.894')] -[2024-07-05 14:54:28,279][03976] Updated weights for policy 0, policy_version 57385 (0.0102) -[2024-07-05 14:54:30,718][03976] Updated weights for policy 0, policy_version 57395 (0.0009) -[2024-07-05 14:54:31,168][03423] Fps is (10 sec: 17202.7, 60 sec: 17202.7, 300 sec: 17202.7). Total num frames: 450183168. Throughput: 0: 4035.1. Samples: 40352. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:54:31,169][03423] Avg episode reward: [(0, '16.075')] -[2024-07-05 14:54:32,101][03423] Heartbeat connected on Batcher_0 -[2024-07-05 14:54:32,126][03423] Heartbeat connected on RolloutWorker_w1 -[2024-07-05 14:54:32,127][03423] Heartbeat connected on RolloutWorker_w4 -[2024-07-05 14:54:32,140][03423] Heartbeat connected on RolloutWorker_w2 -[2024-07-05 14:54:32,141][03423] Heartbeat connected on RolloutWorker_w5 -[2024-07-05 14:54:32,143][03423] Heartbeat connected on RolloutWorker_w7 -[2024-07-05 14:54:32,144][03423] Heartbeat connected on InferenceWorker_p0-w0 -[2024-07-05 14:54:32,146][03423] Heartbeat connected on RolloutWorker_w0 -[2024-07-05 14:54:32,157][03423] Heartbeat connected on RolloutWorker_w8 -[2024-07-05 14:54:32,163][03423] Heartbeat connected on RolloutWorker_w9 -[2024-07-05 14:54:32,164][03423] Heartbeat connected on RolloutWorker_w6 -[2024-07-05 14:54:32,168][03423] Heartbeat connected on RolloutWorker_w10 -[2024-07-05 14:54:32,171][03423] Heartbeat connected on RolloutWorker_w12 -[2024-07-05 14:54:32,174][03423] Heartbeat connected on RolloutWorker_w13 -[2024-07-05 14:54:32,175][03423] Heartbeat connected on RolloutWorker_w3 -[2024-07-05 14:54:32,180][03423] Heartbeat connected on RolloutWorker_w15 -[2024-07-05 14:54:32,181][03423] Heartbeat connected on RolloutWorker_w11 -[2024-07-05 14:54:32,187][03423] Heartbeat connected on RolloutWorker_w14 -[2024-07-05 14:54:32,189][03423] Heartbeat connected on LearnerWorker_p0 -[2024-07-05 14:54:33,142][03976] Updated weights for policy 0, policy_version 57405 (0.0009) -[2024-07-05 14:54:35,712][03976] Updated weights for policy 0, policy_version 57415 (0.0012) -[2024-07-05 14:54:36,168][03423] Fps is (10 sec: 33587.2, 60 sec: 22937.6, 300 sec: 22937.6). Total num frames: 450355200. Throughput: 0: 4407.5. Samples: 66112. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:54:36,169][03423] Avg episode reward: [(0, '53.339')] -[2024-07-05 14:54:37,918][03976] Updated weights for policy 0, policy_version 57425 (0.0011) -[2024-07-05 14:54:40,045][03976] Updated weights for policy 0, policy_version 57435 (0.0009) -[2024-07-05 14:54:41,171][03423] Fps is (10 sec: 35214.9, 60 sec: 26210.0, 300 sec: 26210.0). Total num frames: 450535424. Throughput: 0: 5935.2. Samples: 118724. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:54:41,173][03423] Avg episode reward: [(0, '55.334')] -[2024-07-05 14:54:42,227][03976] Updated weights for policy 0, policy_version 57445 (0.0009) -[2024-07-05 14:54:44,395][03976] Updated weights for policy 0, policy_version 57455 (0.0009) -[2024-07-05 14:54:46,168][03423] Fps is (10 sec: 37683.1, 60 sec: 28835.8, 300 sec: 28835.8). Total num frames: 450732032. Throughput: 0: 6993.9. Samples: 174848. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:54:46,168][03423] Avg episode reward: [(0, '52.575')] -[2024-07-05 14:54:46,595][03976] Updated weights for policy 0, policy_version 57465 (0.0013) -[2024-07-05 14:54:48,810][03976] Updated weights for policy 0, policy_version 57475 (0.0009) -[2024-07-05 14:54:51,177][03423] Fps is (10 sec: 36849.0, 60 sec: 29756.9, 300 sec: 29756.9). Total num frames: 450904064. Throughput: 0: 6757.8. Samples: 202784. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:54:51,182][03423] Avg episode reward: [(0, '55.054')] -[2024-07-05 14:54:51,223][03976] Updated weights for policy 0, policy_version 57485 (0.0014) -[2024-07-05 14:54:53,638][03976] Updated weights for policy 0, policy_version 57495 (0.0014) -[2024-07-05 14:54:56,112][03976] Updated weights for policy 0, policy_version 57505 (0.0011) -[2024-07-05 14:54:56,168][03423] Fps is (10 sec: 34405.7, 60 sec: 30427.3, 300 sec: 30427.3). Total num frames: 451076096. Throughput: 0: 7264.2. Samples: 254248. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:54:56,169][03423] Avg episode reward: [(0, '54.890')] -[2024-07-05 14:54:58,466][03976] Updated weights for policy 0, policy_version 57515 (0.0009) -[2024-07-05 14:55:00,852][03976] Updated weights for policy 0, policy_version 57525 (0.0010) -[2024-07-05 14:55:01,168][03423] Fps is (10 sec: 34432.0, 60 sec: 30924.8, 300 sec: 30924.8). Total num frames: 451248128. Throughput: 0: 7638.9. Samples: 305556. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-07-05 14:55:01,169][03423] Avg episode reward: [(0, '54.842')] -[2024-07-05 14:55:03,151][03976] Updated weights for policy 0, policy_version 57535 (0.0012) -[2024-07-05 14:55:05,500][03976] Updated weights for policy 0, policy_version 57545 (0.0014) -[2024-07-05 14:55:06,168][03423] Fps is (10 sec: 35226.2, 60 sec: 31493.7, 300 sec: 31493.7). Total num frames: 451428352. Throughput: 0: 7378.0. Samples: 332008. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:06,169][03423] Avg episode reward: [(0, '53.592')] -[2024-07-05 14:55:07,758][03976] Updated weights for policy 0, policy_version 57555 (0.0009) -[2024-07-05 14:55:09,913][03976] Updated weights for policy 0, policy_version 57565 (0.0008) -[2024-07-05 14:55:11,167][03423] Fps is (10 sec: 36045.1, 60 sec: 31948.8, 300 sec: 31948.8). Total num frames: 451608576. Throughput: 0: 8471.0. Samples: 386964. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:11,169][03423] Avg episode reward: [(0, '54.978')] -[2024-07-05 14:55:12,307][03976] Updated weights for policy 0, policy_version 57575 (0.0009) -[2024-07-05 14:55:14,566][03976] Updated weights for policy 0, policy_version 57585 (0.0013) -[2024-07-05 14:55:16,168][03423] Fps is (10 sec: 36044.5, 60 sec: 32321.1, 300 sec: 32321.1). Total num frames: 451788800. Throughput: 0: 8870.8. Samples: 439536. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:16,169][03423] Avg episode reward: [(0, '51.653')] -[2024-07-05 14:55:16,809][03976] Updated weights for policy 0, policy_version 57595 (0.0013) -[2024-07-05 14:55:18,981][03976] Updated weights for policy 0, policy_version 57605 (0.0010) -[2024-07-05 14:55:21,153][03976] Updated weights for policy 0, policy_version 57615 (0.0009) -[2024-07-05 14:55:21,167][03423] Fps is (10 sec: 36864.0, 60 sec: 32768.0, 300 sec: 32768.0). Total num frames: 451977216. Throughput: 0: 8920.4. Samples: 467528. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:21,168][03423] Avg episode reward: [(0, '54.931')] -[2024-07-05 14:55:23,360][03976] Updated weights for policy 0, policy_version 57625 (0.0012) -[2024-07-05 14:55:25,610][03976] Updated weights for policy 0, policy_version 57635 (0.0009) -[2024-07-05 14:55:26,168][03423] Fps is (10 sec: 36864.2, 60 sec: 35635.2, 300 sec: 33020.0). Total num frames: 452157440. Throughput: 0: 8995.1. Samples: 523472. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:26,169][03423] Avg episode reward: [(0, '55.077')] -[2024-07-05 14:55:27,867][03976] Updated weights for policy 0, policy_version 57645 (0.0010) -[2024-07-05 14:55:30,137][03976] Updated weights for policy 0, policy_version 57655 (0.0012) -[2024-07-05 14:55:31,168][03423] Fps is (10 sec: 36044.4, 60 sec: 35908.4, 300 sec: 33236.1). Total num frames: 452337664. Throughput: 0: 8958.8. Samples: 577996. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:31,169][03423] Avg episode reward: [(0, '54.934')] -[2024-07-05 14:55:32,388][03976] Updated weights for policy 0, policy_version 57665 (0.0012) -[2024-07-05 14:55:34,619][03976] Updated weights for policy 0, policy_version 57675 (0.0014) -[2024-07-05 14:55:36,168][03423] Fps is (10 sec: 36864.0, 60 sec: 36181.3, 300 sec: 33532.6). Total num frames: 452526080. Throughput: 0: 8946.0. Samples: 605288. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:36,169][03423] Avg episode reward: [(0, '55.470')] -[2024-07-05 14:55:36,759][03976] Updated weights for policy 0, policy_version 57685 (0.0010) -[2024-07-05 14:55:39,016][03976] Updated weights for policy 0, policy_version 57695 (0.0009) -[2024-07-05 14:55:41,168][03423] Fps is (10 sec: 36863.7, 60 sec: 36183.3, 300 sec: 33689.5). Total num frames: 452706304. Throughput: 0: 9045.3. Samples: 661284. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:41,169][03423] Avg episode reward: [(0, '52.742')] -[2024-07-05 14:55:41,265][03976] Updated weights for policy 0, policy_version 57705 (0.0009) -[2024-07-05 14:55:43,512][03976] Updated weights for policy 0, policy_version 57715 (0.0011) -[2024-07-05 14:55:45,772][03976] Updated weights for policy 0, policy_version 57725 (0.0012) -[2024-07-05 14:55:46,168][03423] Fps is (10 sec: 36045.0, 60 sec: 35908.3, 300 sec: 33828.1). Total num frames: 452886528. Throughput: 0: 9126.3. Samples: 716240. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:46,169][03423] Avg episode reward: [(0, '53.182')] -[2024-07-05 14:55:48,014][03976] Updated weights for policy 0, policy_version 57735 (0.0009) -[2024-07-05 14:55:50,137][03976] Updated weights for policy 0, policy_version 57745 (0.0010) -[2024-07-05 14:55:51,176][03423] Fps is (10 sec: 36840.1, 60 sec: 36181.8, 300 sec: 34039.8). Total num frames: 453074944. Throughput: 0: 9143.6. Samples: 743532. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:51,178][03423] Avg episode reward: [(0, '51.911')] -[2024-07-05 14:55:52,389][03976] Updated weights for policy 0, policy_version 57755 (0.0009) -[2024-07-05 14:55:54,771][03976] Updated weights for policy 0, policy_version 57765 (0.0010) -[2024-07-05 14:55:56,168][03423] Fps is (10 sec: 36864.0, 60 sec: 36318.0, 300 sec: 34147.7). Total num frames: 453255168. Throughput: 0: 9120.9. Samples: 797404. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:55:56,168][03423] Avg episode reward: [(0, '52.203')] -[2024-07-05 14:55:57,057][03976] Updated weights for policy 0, policy_version 57775 (0.0010) -[2024-07-05 14:55:59,417][03976] Updated weights for policy 0, policy_version 57785 (0.0015) -[2024-07-05 14:56:01,168][03423] Fps is (10 sec: 35248.9, 60 sec: 36317.9, 300 sec: 34160.6). Total num frames: 453427200. Throughput: 0: 9132.7. Samples: 850508. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:56:01,168][03423] Avg episode reward: [(0, '54.525')] -[2024-07-05 14:56:01,742][03976] Updated weights for policy 0, policy_version 57795 (0.0013) -[2024-07-05 14:56:03,943][03976] Updated weights for policy 0, policy_version 57805 (0.0010) -[2024-07-05 14:56:06,168][03423] Fps is (10 sec: 35225.4, 60 sec: 36317.9, 300 sec: 34250.3). Total num frames: 453607424. Throughput: 0: 9116.7. Samples: 877780. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:56:06,180][03423] Avg episode reward: [(0, '54.195')] -[2024-07-05 14:56:06,228][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000057815_453615616.pth... -[2024-07-05 14:56:06,227][03976] Updated weights for policy 0, policy_version 57815 (0.0009) -[2024-07-05 14:56:06,308][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000057046_447315968.pth -[2024-07-05 14:56:08,439][03976] Updated weights for policy 0, policy_version 57825 (0.0009) -[2024-07-05 14:56:10,609][03976] Updated weights for policy 0, policy_version 57835 (0.0009) -[2024-07-05 14:56:11,168][03423] Fps is (10 sec: 36863.7, 60 sec: 36454.3, 300 sec: 34406.4). Total num frames: 453795840. Throughput: 0: 9100.3. Samples: 932984. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:56:11,169][03423] Avg episode reward: [(0, '54.645')] -[2024-07-05 14:56:12,772][03976] Updated weights for policy 0, policy_version 57845 (0.0008) -[2024-07-05 14:56:14,977][03976] Updated weights for policy 0, policy_version 57855 (0.0010) -[2024-07-05 14:56:16,168][03423] Fps is (10 sec: 37683.2, 60 sec: 36591.0, 300 sec: 34548.9). Total num frames: 453984256. Throughput: 0: 9146.3. Samples: 989580. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:56:16,169][03423] Avg episode reward: [(0, '53.463')] -[2024-07-05 14:56:17,153][03976] Updated weights for policy 0, policy_version 57865 (0.0009) -[2024-07-05 14:56:19,265][03976] Updated weights for policy 0, policy_version 57875 (0.0010) -[2024-07-05 14:56:21,168][03423] Fps is (10 sec: 37683.4, 60 sec: 36590.9, 300 sec: 34679.5). Total num frames: 454172672. Throughput: 0: 9168.9. Samples: 1017888. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:56:21,169][03423] Avg episode reward: [(0, '55.815')] -[2024-07-05 14:56:21,489][03976] Updated weights for policy 0, policy_version 57885 (0.0010) -[2024-07-05 14:56:23,947][03976] Updated weights for policy 0, policy_version 57895 (0.0011) -[2024-07-05 14:56:26,167][03423] Fps is (10 sec: 36045.2, 60 sec: 36454.5, 300 sec: 34668.6). Total num frames: 454344704. Throughput: 0: 9122.2. Samples: 1071780. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:56:26,168][03423] Avg episode reward: [(0, '55.978')] -[2024-07-05 14:56:26,322][03976] Updated weights for policy 0, policy_version 57905 (0.0016) -[2024-07-05 14:56:28,493][03976] Updated weights for policy 0, policy_version 57915 (0.0009) -[2024-07-05 14:56:30,637][03976] Updated weights for policy 0, policy_version 57925 (0.0009) -[2024-07-05 14:56:31,168][03423] Fps is (10 sec: 36044.9, 60 sec: 36591.0, 300 sec: 34784.5). Total num frames: 454533120. Throughput: 0: 9113.6. Samples: 1126352. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:56:31,169][03423] Avg episode reward: [(0, '55.718')] -[2024-07-05 14:56:32,862][03976] Updated weights for policy 0, policy_version 57935 (0.0009) -[2024-07-05 14:56:35,051][03976] Updated weights for policy 0, policy_version 57945 (0.0010) -[2024-07-05 14:56:36,168][03423] Fps is (10 sec: 37682.3, 60 sec: 36590.9, 300 sec: 34891.8). Total num frames: 454721536. Throughput: 0: 9110.2. Samples: 1153432. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:56:36,169][03423] Avg episode reward: [(0, '55.003')] -[2024-07-05 14:56:37,255][03976] Updated weights for policy 0, policy_version 57955 (0.0009) -[2024-07-05 14:56:39,462][03976] Updated weights for policy 0, policy_version 57965 (0.0011) -[2024-07-05 14:56:41,167][03423] Fps is (10 sec: 36864.2, 60 sec: 36591.0, 300 sec: 34933.0). Total num frames: 454901760. Throughput: 0: 9174.7. Samples: 1210264. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:56:41,168][03423] Avg episode reward: [(0, '54.972')] -[2024-07-05 14:56:41,675][03976] Updated weights for policy 0, policy_version 57975 (0.0010) -[2024-07-05 14:56:43,801][03976] Updated weights for policy 0, policy_version 57985 (0.0009) -[2024-07-05 14:56:46,016][03976] Updated weights for policy 0, policy_version 57995 (0.0013) -[2024-07-05 14:56:46,168][03423] Fps is (10 sec: 36864.5, 60 sec: 36727.4, 300 sec: 35027.9). Total num frames: 455090176. Throughput: 0: 9237.5. Samples: 1266196. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:56:46,169][03423] Avg episode reward: [(0, '54.300')] -[2024-07-05 14:56:48,206][03976] Updated weights for policy 0, policy_version 58005 (0.0009) -[2024-07-05 14:56:50,394][03976] Updated weights for policy 0, policy_version 58015 (0.0009) -[2024-07-05 14:56:51,167][03423] Fps is (10 sec: 37683.2, 60 sec: 36731.5, 300 sec: 35116.4). Total num frames: 455278592. Throughput: 0: 9261.3. Samples: 1294536. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:56:51,168][03423] Avg episode reward: [(0, '54.951')] -[2024-07-05 14:56:52,583][03976] Updated weights for policy 0, policy_version 58025 (0.0009) -[2024-07-05 14:56:54,766][03976] Updated weights for policy 0, policy_version 58035 (0.0009) -[2024-07-05 14:56:56,175][03423] Fps is (10 sec: 37660.1, 60 sec: 36860.2, 300 sec: 35197.8). Total num frames: 455467008. Throughput: 0: 9280.7. Samples: 1350672. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:56:56,176][03423] Avg episode reward: [(0, '54.598')] -[2024-07-05 14:56:56,934][03976] Updated weights for policy 0, policy_version 58045 (0.0009) -[2024-07-05 14:56:59,083][03976] Updated weights for policy 0, policy_version 58055 (0.0009) -[2024-07-05 14:57:01,167][03423] Fps is (10 sec: 37683.2, 60 sec: 37137.1, 300 sec: 35276.8). Total num frames: 455655424. Throughput: 0: 9287.8. Samples: 1407532. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:01,180][03423] Avg episode reward: [(0, '53.331')] -[2024-07-05 14:57:01,233][03976] Updated weights for policy 0, policy_version 58065 (0.0009) -[2024-07-05 14:57:03,436][03976] Updated weights for policy 0, policy_version 58075 (0.0009) -[2024-07-05 14:57:05,750][03976] Updated weights for policy 0, policy_version 58085 (0.0012) -[2024-07-05 14:57:06,179][03423] Fps is (10 sec: 36848.3, 60 sec: 37130.6, 300 sec: 35297.8). Total num frames: 455835648. Throughput: 0: 9280.5. Samples: 1435608. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:06,181][03423] Avg episode reward: [(0, '54.406')] -[2024-07-05 14:57:08,183][03976] Updated weights for policy 0, policy_version 58095 (0.0010) -[2024-07-05 14:57:10,365][03976] Updated weights for policy 0, policy_version 58105 (0.0009) -[2024-07-05 14:57:11,168][03423] Fps is (10 sec: 36044.8, 60 sec: 37000.6, 300 sec: 35322.0). Total num frames: 456015872. Throughput: 0: 9255.9. Samples: 1488296. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:11,169][03423] Avg episode reward: [(0, '54.517')] -[2024-07-05 14:57:12,548][03976] Updated weights for policy 0, policy_version 58115 (0.0010) -[2024-07-05 14:57:14,716][03976] Updated weights for policy 0, policy_version 58125 (0.0009) -[2024-07-05 14:57:16,177][03423] Fps is (10 sec: 36875.7, 60 sec: 36996.1, 300 sec: 35388.0). Total num frames: 456204288. Throughput: 0: 9293.2. Samples: 1544612. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:16,180][03423] Avg episode reward: [(0, '55.071')] -[2024-07-05 14:57:16,890][03976] Updated weights for policy 0, policy_version 58135 (0.0009) -[2024-07-05 14:57:19,069][03976] Updated weights for policy 0, policy_version 58145 (0.0009) -[2024-07-05 14:57:21,167][03423] Fps is (10 sec: 37683.3, 60 sec: 37000.6, 300 sec: 35453.2). Total num frames: 456392704. Throughput: 0: 9323.1. Samples: 1572968. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:21,168][03423] Avg episode reward: [(0, '54.803')] -[2024-07-05 14:57:21,253][03976] Updated weights for policy 0, policy_version 58155 (0.0012) -[2024-07-05 14:57:23,394][03976] Updated weights for policy 0, policy_version 58165 (0.0008) -[2024-07-05 14:57:25,779][03976] Updated weights for policy 0, policy_version 58175 (0.0010) -[2024-07-05 14:57:26,168][03423] Fps is (10 sec: 36890.7, 60 sec: 37137.0, 300 sec: 35469.1). Total num frames: 456572928. Throughput: 0: 9316.4. Samples: 1629504. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:26,169][03423] Avg episode reward: [(0, '53.444')] -[2024-07-05 14:57:28,280][03976] Updated weights for policy 0, policy_version 58185 (0.0010) -[2024-07-05 14:57:30,839][03976] Updated weights for policy 0, policy_version 58195 (0.0011) -[2024-07-05 14:57:31,168][03423] Fps is (10 sec: 34406.2, 60 sec: 36727.5, 300 sec: 35398.1). Total num frames: 456736768. Throughput: 0: 9145.1. Samples: 1677724. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:31,169][03423] Avg episode reward: [(0, '55.069')] -[2024-07-05 14:57:33,358][03976] Updated weights for policy 0, policy_version 58205 (0.0011) -[2024-07-05 14:57:35,804][03976] Updated weights for policy 0, policy_version 58215 (0.0010) -[2024-07-05 14:57:36,168][03423] Fps is (10 sec: 32767.4, 60 sec: 36317.8, 300 sec: 35330.6). Total num frames: 456900608. Throughput: 0: 9062.8. Samples: 1702364. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:36,169][03423] Avg episode reward: [(0, '53.463')] -[2024-07-05 14:57:38,018][03976] Updated weights for policy 0, policy_version 58225 (0.0010) -[2024-07-05 14:57:40,276][03976] Updated weights for policy 0, policy_version 58235 (0.0010) -[2024-07-05 14:57:41,168][03423] Fps is (10 sec: 35225.6, 60 sec: 36454.4, 300 sec: 35389.4). Total num frames: 457089024. Throughput: 0: 9002.9. Samples: 1755748. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:41,169][03423] Avg episode reward: [(0, '55.821')] -[2024-07-05 14:57:42,530][03976] Updated weights for policy 0, policy_version 58245 (0.0011) -[2024-07-05 14:57:44,795][03976] Updated weights for policy 0, policy_version 58255 (0.0015) -[2024-07-05 14:57:46,168][03423] Fps is (10 sec: 36045.3, 60 sec: 36181.3, 300 sec: 35365.4). Total num frames: 457261056. Throughput: 0: 8939.4. Samples: 1809804. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:46,169][03423] Avg episode reward: [(0, '54.269')] -[2024-07-05 14:57:47,175][03976] Updated weights for policy 0, policy_version 58265 (0.0010) -[2024-07-05 14:57:49,546][03976] Updated weights for policy 0, policy_version 58275 (0.0010) -[2024-07-05 14:57:51,168][03423] Fps is (10 sec: 35225.5, 60 sec: 36044.8, 300 sec: 35381.6). Total num frames: 457441280. Throughput: 0: 8893.3. Samples: 1835712. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:51,169][03423] Avg episode reward: [(0, '54.371')] -[2024-07-05 14:57:51,792][03976] Updated weights for policy 0, policy_version 58285 (0.0013) -[2024-07-05 14:57:54,108][03976] Updated weights for policy 0, policy_version 58295 (0.0010) -[2024-07-05 14:57:56,168][03423] Fps is (10 sec: 35225.9, 60 sec: 35775.4, 300 sec: 35359.0). Total num frames: 457613312. Throughput: 0: 8896.9. Samples: 1888656. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 14:57:56,169][03423] Avg episode reward: [(0, '57.122')] -[2024-07-05 14:57:56,548][03976] Updated weights for policy 0, policy_version 58305 (0.0010) -[2024-07-05 14:57:58,961][03976] Updated weights for policy 0, policy_version 58315 (0.0011) -[2024-07-05 14:58:01,168][03423] Fps is (10 sec: 34406.5, 60 sec: 35498.7, 300 sec: 35337.3). Total num frames: 457785344. Throughput: 0: 8768.4. Samples: 1939128. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:58:01,169][03423] Avg episode reward: [(0, '54.177')] -[2024-07-05 14:58:01,367][03976] Updated weights for policy 0, policy_version 58325 (0.0010) -[2024-07-05 14:58:03,696][03976] Updated weights for policy 0, policy_version 58335 (0.0010) -[2024-07-05 14:58:05,934][03976] Updated weights for policy 0, policy_version 58345 (0.0010) -[2024-07-05 14:58:06,168][03423] Fps is (10 sec: 34406.3, 60 sec: 35368.3, 300 sec: 35316.6). Total num frames: 457957376. Throughput: 0: 8729.8. Samples: 1965812. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:58:06,169][03423] Avg episode reward: [(0, '53.883')] -[2024-07-05 14:58:06,213][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000058346_457965568.pth... -[2024-07-05 14:58:06,298][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000057375_450011136.pth -[2024-07-05 14:58:08,307][03976] Updated weights for policy 0, policy_version 58355 (0.0009) -[2024-07-05 14:58:10,628][03976] Updated weights for policy 0, policy_version 58365 (0.0010) -[2024-07-05 14:58:11,177][03423] Fps is (10 sec: 35194.7, 60 sec: 35356.9, 300 sec: 35331.1). Total num frames: 458137600. Throughput: 0: 8660.4. Samples: 2019296. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:58:11,179][03423] Avg episode reward: [(0, '54.865')] -[2024-07-05 14:58:12,985][03976] Updated weights for policy 0, policy_version 58375 (0.0010) -[2024-07-05 14:58:15,266][03976] Updated weights for policy 0, policy_version 58385 (0.0016) -[2024-07-05 14:58:16,168][03423] Fps is (10 sec: 36045.0, 60 sec: 35229.9, 300 sec: 35347.6). Total num frames: 458317824. Throughput: 0: 8772.3. Samples: 2072476. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:58:16,169][03423] Avg episode reward: [(0, '55.044')] -[2024-07-05 14:58:17,494][03976] Updated weights for policy 0, policy_version 58395 (0.0010) -[2024-07-05 14:58:19,791][03976] Updated weights for policy 0, policy_version 58405 (0.0010) -[2024-07-05 14:58:21,168][03423] Fps is (10 sec: 36076.5, 60 sec: 35089.0, 300 sec: 35362.1). Total num frames: 458498048. Throughput: 0: 8825.2. Samples: 2099496. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:58:21,169][03423] Avg episode reward: [(0, '57.133')] -[2024-07-05 14:58:22,026][03976] Updated weights for policy 0, policy_version 58415 (0.0009) -[2024-07-05 14:58:24,303][03976] Updated weights for policy 0, policy_version 58425 (0.0010) -[2024-07-05 14:58:26,168][03423] Fps is (10 sec: 36044.9, 60 sec: 35089.1, 300 sec: 35376.1). Total num frames: 458678272. Throughput: 0: 8823.5. Samples: 2152804. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:58:26,169][03423] Avg episode reward: [(0, '55.695')] -[2024-07-05 14:58:26,604][03976] Updated weights for policy 0, policy_version 58435 (0.0015) -[2024-07-05 14:58:28,919][03976] Updated weights for policy 0, policy_version 58445 (0.0009) -[2024-07-05 14:58:31,167][03423] Fps is (10 sec: 35225.8, 60 sec: 35225.6, 300 sec: 35356.7). Total num frames: 458850304. Throughput: 0: 8824.8. Samples: 2206920. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:58:31,168][03423] Avg episode reward: [(0, '55.300')] -[2024-07-05 14:58:31,189][03976] Updated weights for policy 0, policy_version 58455 (0.0009) -[2024-07-05 14:58:33,460][03976] Updated weights for policy 0, policy_version 58465 (0.0009) -[2024-07-05 14:58:35,744][03976] Updated weights for policy 0, policy_version 58475 (0.0011) -[2024-07-05 14:58:36,168][03423] Fps is (10 sec: 35225.4, 60 sec: 35498.8, 300 sec: 35370.2). Total num frames: 459030528. Throughput: 0: 8850.7. Samples: 2233992. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 14:58:36,169][03423] Avg episode reward: [(0, '53.255')] -[2024-07-05 14:58:38,026][03976] Updated weights for policy 0, policy_version 58485 (0.0019) -[2024-07-05 14:58:39,849][03976] Updated weights for policy 0, policy_version 58495 (0.0010) -[2024-07-05 14:58:41,167][03423] Fps is (10 sec: 39321.4, 60 sec: 35908.3, 300 sec: 35509.2). Total num frames: 459243520. Throughput: 0: 8935.0. Samples: 2290732. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:58:41,168][03423] Avg episode reward: [(0, '53.908')] -[2024-07-05 14:58:41,542][03976] Updated weights for policy 0, policy_version 58505 (0.0008) -[2024-07-05 14:58:43,270][03976] Updated weights for policy 0, policy_version 58515 (0.0008) -[2024-07-05 14:58:44,983][03976] Updated weights for policy 0, policy_version 58525 (0.0011) -[2024-07-05 14:58:46,167][03423] Fps is (10 sec: 45056.3, 60 sec: 37000.6, 300 sec: 35735.7). Total num frames: 459481088. Throughput: 0: 9387.2. Samples: 2361552. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:58:46,168][03423] Avg episode reward: [(0, '54.732')] -[2024-07-05 14:58:46,709][03976] Updated weights for policy 0, policy_version 58535 (0.0012) -[2024-07-05 14:58:48,460][03976] Updated weights for policy 0, policy_version 58545 (0.0009) -[2024-07-05 14:58:50,220][03976] Updated weights for policy 0, policy_version 58555 (0.0010) -[2024-07-05 14:58:51,168][03423] Fps is (10 sec: 47513.2, 60 sec: 37956.2, 300 sec: 35953.8). Total num frames: 459718656. Throughput: 0: 9591.3. Samples: 2397420. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:58:51,169][03423] Avg episode reward: [(0, '54.748')] -[2024-07-05 14:58:51,955][03976] Updated weights for policy 0, policy_version 58565 (0.0008) -[2024-07-05 14:58:53,644][03976] Updated weights for policy 0, policy_version 58575 (0.0008) -[2024-07-05 14:58:55,386][03976] Updated weights for policy 0, policy_version 58585 (0.0008) -[2024-07-05 14:58:56,167][03423] Fps is (10 sec: 47513.6, 60 sec: 39048.6, 300 sec: 36164.0). Total num frames: 459956224. Throughput: 0: 9985.3. Samples: 2468548. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:58:56,169][03423] Avg episode reward: [(0, '54.516')] -[2024-07-05 14:58:57,079][03976] Updated weights for policy 0, policy_version 58595 (0.0012) -[2024-07-05 14:58:58,827][03976] Updated weights for policy 0, policy_version 58605 (0.0008) -[2024-07-05 14:59:00,532][03976] Updated weights for policy 0, policy_version 58615 (0.0008) -[2024-07-05 14:59:01,168][03423] Fps is (10 sec: 47513.9, 60 sec: 40140.8, 300 sec: 36366.6). Total num frames: 460193792. Throughput: 0: 10382.6. Samples: 2539692. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:59:01,169][03423] Avg episode reward: [(0, '52.939')] -[2024-07-05 14:59:02,231][03976] Updated weights for policy 0, policy_version 58625 (0.0011) -[2024-07-05 14:59:03,954][03976] Updated weights for policy 0, policy_version 58635 (0.0008) -[2024-07-05 14:59:05,684][03976] Updated weights for policy 0, policy_version 58645 (0.0014) -[2024-07-05 14:59:06,168][03423] Fps is (10 sec: 47513.5, 60 sec: 41233.1, 300 sec: 36562.2). Total num frames: 460431360. Throughput: 0: 10571.2. Samples: 2575200. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:59:06,169][03423] Avg episode reward: [(0, '54.407')] -[2024-07-05 14:59:07,403][03976] Updated weights for policy 0, policy_version 58655 (0.0008) -[2024-07-05 14:59:09,189][03976] Updated weights for policy 0, policy_version 58665 (0.0009) -[2024-07-05 14:59:10,936][03976] Updated weights for policy 0, policy_version 58675 (0.0009) -[2024-07-05 14:59:11,168][03423] Fps is (10 sec: 47513.6, 60 sec: 42195.0, 300 sec: 36751.0). Total num frames: 460668928. Throughput: 0: 10967.6. Samples: 2646344. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 14:59:11,169][03423] Avg episode reward: [(0, '53.556')] -[2024-07-05 14:59:12,632][03976] Updated weights for policy 0, policy_version 58685 (0.0008) -[2024-07-05 14:59:14,400][03976] Updated weights for policy 0, policy_version 58695 (0.0008) -[2024-07-05 14:59:16,108][03976] Updated weights for policy 0, policy_version 58705 (0.0011) -[2024-07-05 14:59:16,168][03423] Fps is (10 sec: 47513.0, 60 sec: 43144.5, 300 sec: 36933.4). Total num frames: 460906496. Throughput: 0: 11349.7. Samples: 2717660. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:59:16,169][03423] Avg episode reward: [(0, '54.244')] -[2024-07-05 14:59:17,823][03976] Updated weights for policy 0, policy_version 58715 (0.0008) -[2024-07-05 14:59:19,550][03976] Updated weights for policy 0, policy_version 58725 (0.0008) -[2024-07-05 14:59:21,167][03423] Fps is (10 sec: 47514.0, 60 sec: 44100.3, 300 sec: 37711.0). Total num frames: 461144064. Throughput: 0: 11533.8. Samples: 2753012. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:59:21,168][03423] Avg episode reward: [(0, '56.034')] -[2024-07-05 14:59:21,282][03976] Updated weights for policy 0, policy_version 58735 (0.0008) -[2024-07-05 14:59:23,025][03976] Updated weights for policy 0, policy_version 58745 (0.0012) -[2024-07-05 14:59:24,722][03976] Updated weights for policy 0, policy_version 58755 (0.0009) -[2024-07-05 14:59:26,168][03423] Fps is (10 sec: 47513.9, 60 sec: 45056.0, 300 sec: 37960.9). Total num frames: 461381632. Throughput: 0: 11854.0. Samples: 2824164. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:59:26,169][03423] Avg episode reward: [(0, '52.873')] -[2024-07-05 14:59:26,456][03976] Updated weights for policy 0, policy_version 58765 (0.0008) -[2024-07-05 14:59:28,173][03976] Updated weights for policy 0, policy_version 58775 (0.0008) -[2024-07-05 14:59:29,920][03976] Updated weights for policy 0, policy_version 58785 (0.0008) -[2024-07-05 14:59:31,167][03423] Fps is (10 sec: 47513.5, 60 sec: 46148.3, 300 sec: 38183.1). Total num frames: 461619200. Throughput: 0: 11859.2. Samples: 2895216. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:59:31,169][03423] Avg episode reward: [(0, '53.722')] -[2024-07-05 14:59:31,688][03976] Updated weights for policy 0, policy_version 58795 (0.0008) -[2024-07-05 14:59:33,427][03976] Updated weights for policy 0, policy_version 58805 (0.0008) -[2024-07-05 14:59:35,153][03976] Updated weights for policy 0, policy_version 58815 (0.0008) -[2024-07-05 14:59:36,176][03423] Fps is (10 sec: 47514.1, 60 sec: 47104.1, 300 sec: 38377.9). Total num frames: 461856768. Throughput: 0: 11848.6. Samples: 2930604. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:59:36,177][03423] Avg episode reward: [(0, '51.851')] -[2024-07-05 14:59:36,823][03976] Updated weights for policy 0, policy_version 58825 (0.0008) -[2024-07-05 14:59:38,595][03976] Updated weights for policy 0, policy_version 58835 (0.0007) -[2024-07-05 14:59:40,345][03976] Updated weights for policy 0, policy_version 58845 (0.0008) -[2024-07-05 14:59:41,167][03423] Fps is (10 sec: 46695.0, 60 sec: 47377.2, 300 sec: 38488.5). Total num frames: 462086144. Throughput: 0: 11840.8. Samples: 3001380. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:59:41,168][03423] Avg episode reward: [(0, '54.314')] -[2024-07-05 14:59:42,066][03976] Updated weights for policy 0, policy_version 58855 (0.0008) -[2024-07-05 14:59:43,781][03976] Updated weights for policy 0, policy_version 58865 (0.0009) -[2024-07-05 14:59:45,517][03976] Updated weights for policy 0, policy_version 58875 (0.0008) -[2024-07-05 14:59:46,168][03423] Fps is (10 sec: 46693.7, 60 sec: 47377.0, 300 sec: 38711.6). Total num frames: 462323712. Throughput: 0: 11837.8. Samples: 3072392. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 14:59:46,169][03423] Avg episode reward: [(0, '52.513')] -[2024-07-05 14:59:47,265][03976] Updated weights for policy 0, policy_version 58885 (0.0008) -[2024-07-05 14:59:48,986][03976] Updated weights for policy 0, policy_version 58895 (0.0010) -[2024-07-05 14:59:50,694][03976] Updated weights for policy 0, policy_version 58905 (0.0008) -[2024-07-05 14:59:51,168][03423] Fps is (10 sec: 47512.8, 60 sec: 47377.1, 300 sec: 38932.9). Total num frames: 462561280. Throughput: 0: 11838.9. Samples: 3107952. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:59:51,168][03423] Avg episode reward: [(0, '55.318')] -[2024-07-05 14:59:52,423][03976] Updated weights for policy 0, policy_version 58915 (0.0008) -[2024-07-05 14:59:54,154][03976] Updated weights for policy 0, policy_version 58925 (0.0011) -[2024-07-05 14:59:55,968][03976] Updated weights for policy 0, policy_version 58935 (0.0009) -[2024-07-05 14:59:56,168][03423] Fps is (10 sec: 46694.8, 60 sec: 47240.5, 300 sec: 39127.2). Total num frames: 462790656. Throughput: 0: 11827.8. Samples: 3178596. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 14:59:56,168][03423] Avg episode reward: [(0, '53.648')] -[2024-07-05 14:59:57,770][03976] Updated weights for policy 0, policy_version 58945 (0.0013) -[2024-07-05 14:59:59,669][03976] Updated weights for policy 0, policy_version 58955 (0.0011) -[2024-07-05 15:00:01,168][03423] Fps is (10 sec: 45874.4, 60 sec: 47103.9, 300 sec: 39293.8). Total num frames: 463020032. Throughput: 0: 11748.9. Samples: 3246360. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:01,169][03423] Avg episode reward: [(0, '55.904')] -[2024-07-05 15:00:01,430][03976] Updated weights for policy 0, policy_version 58965 (0.0011) -[2024-07-05 15:00:03,202][03976] Updated weights for policy 0, policy_version 58975 (0.0008) -[2024-07-05 15:00:04,984][03976] Updated weights for policy 0, policy_version 58985 (0.0008) -[2024-07-05 15:00:06,167][03423] Fps is (10 sec: 45875.6, 60 sec: 46967.5, 300 sec: 39460.4). Total num frames: 463249408. Throughput: 0: 11736.4. Samples: 3281148. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:06,168][03423] Avg episode reward: [(0, '52.742')] -[2024-07-05 15:00:06,186][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000058992_463257600.pth... -[2024-07-05 15:00:06,257][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000057815_453615616.pth -[2024-07-05 15:00:06,745][03976] Updated weights for policy 0, policy_version 58995 (0.0010) -[2024-07-05 15:00:08,479][03976] Updated weights for policy 0, policy_version 59005 (0.0010) -[2024-07-05 15:00:10,207][03976] Updated weights for policy 0, policy_version 59015 (0.0008) -[2024-07-05 15:00:11,167][03423] Fps is (10 sec: 46695.3, 60 sec: 46967.5, 300 sec: 39654.9). Total num frames: 463486976. Throughput: 0: 11711.5. Samples: 3351180. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:11,168][03423] Avg episode reward: [(0, '52.301')] -[2024-07-05 15:00:11,944][03976] Updated weights for policy 0, policy_version 59025 (0.0008) -[2024-07-05 15:00:13,678][03976] Updated weights for policy 0, policy_version 59035 (0.0008) -[2024-07-05 15:00:15,435][03976] Updated weights for policy 0, policy_version 59045 (0.0007) -[2024-07-05 15:00:16,168][03423] Fps is (10 sec: 47513.2, 60 sec: 46967.5, 300 sec: 39821.4). Total num frames: 463724544. Throughput: 0: 11701.3. Samples: 3421776. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:16,168][03423] Avg episode reward: [(0, '55.758')] -[2024-07-05 15:00:17,135][03976] Updated weights for policy 0, policy_version 59055 (0.0011) -[2024-07-05 15:00:18,856][03976] Updated weights for policy 0, policy_version 59065 (0.0008) -[2024-07-05 15:00:20,604][03976] Updated weights for policy 0, policy_version 59075 (0.0008) -[2024-07-05 15:00:21,167][03423] Fps is (10 sec: 46694.6, 60 sec: 46830.9, 300 sec: 39988.1). Total num frames: 463953920. Throughput: 0: 11713.9. Samples: 3457728. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:21,168][03423] Avg episode reward: [(0, '53.018')] -[2024-07-05 15:00:22,321][03976] Updated weights for policy 0, policy_version 59085 (0.0008) -[2024-07-05 15:00:24,128][03976] Updated weights for policy 0, policy_version 59095 (0.0008) -[2024-07-05 15:00:25,894][03976] Updated weights for policy 0, policy_version 59105 (0.0010) -[2024-07-05 15:00:26,168][03423] Fps is (10 sec: 46694.5, 60 sec: 46831.0, 300 sec: 40182.5). Total num frames: 464191488. Throughput: 0: 11699.8. Samples: 3527872. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:26,169][03423] Avg episode reward: [(0, '53.065')] -[2024-07-05 15:00:27,666][03976] Updated weights for policy 0, policy_version 59115 (0.0008) -[2024-07-05 15:00:29,414][03976] Updated weights for policy 0, policy_version 59125 (0.0010) -[2024-07-05 15:00:31,154][03976] Updated weights for policy 0, policy_version 59135 (0.0008) -[2024-07-05 15:00:31,167][03423] Fps is (10 sec: 47513.4, 60 sec: 46830.9, 300 sec: 40349.1). Total num frames: 464429056. Throughput: 0: 11674.3. Samples: 3597732. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:31,168][03423] Avg episode reward: [(0, '54.625')] -[2024-07-05 15:00:32,895][03976] Updated weights for policy 0, policy_version 59145 (0.0008) -[2024-07-05 15:00:34,629][03976] Updated weights for policy 0, policy_version 59155 (0.0008) -[2024-07-05 15:00:36,168][03423] Fps is (10 sec: 46694.4, 60 sec: 46694.4, 300 sec: 40515.7). Total num frames: 464658432. Throughput: 0: 11662.0. Samples: 3632740. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:36,168][03423] Avg episode reward: [(0, '53.949')] -[2024-07-05 15:00:36,410][03976] Updated weights for policy 0, policy_version 59165 (0.0012) -[2024-07-05 15:00:38,135][03976] Updated weights for policy 0, policy_version 59175 (0.0011) -[2024-07-05 15:00:39,872][03976] Updated weights for policy 0, policy_version 59185 (0.0008) -[2024-07-05 15:00:41,168][03423] Fps is (10 sec: 46694.3, 60 sec: 46830.8, 300 sec: 40710.1). Total num frames: 464896000. Throughput: 0: 11656.7. Samples: 3703148. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:41,169][03423] Avg episode reward: [(0, '55.156')] -[2024-07-05 15:00:41,647][03976] Updated weights for policy 0, policy_version 59195 (0.0008) -[2024-07-05 15:00:43,382][03976] Updated weights for policy 0, policy_version 59205 (0.0011) -[2024-07-05 15:00:45,093][03976] Updated weights for policy 0, policy_version 59215 (0.0007) -[2024-07-05 15:00:46,168][03423] Fps is (10 sec: 47513.0, 60 sec: 46830.9, 300 sec: 40877.6). Total num frames: 465133568. Throughput: 0: 11719.6. Samples: 3773740. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:46,169][03423] Avg episode reward: [(0, '55.029')] -[2024-07-05 15:00:46,803][03976] Updated weights for policy 0, policy_version 59225 (0.0008) -[2024-07-05 15:00:48,581][03976] Updated weights for policy 0, policy_version 59235 (0.0008) -[2024-07-05 15:00:50,239][03976] Updated weights for policy 0, policy_version 59245 (0.0008) -[2024-07-05 15:00:51,168][03423] Fps is (10 sec: 47513.3, 60 sec: 46830.9, 300 sec: 41071.1). Total num frames: 465371136. Throughput: 0: 11732.5. Samples: 3809112. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:51,169][03423] Avg episode reward: [(0, '54.550')] -[2024-07-05 15:00:51,952][03976] Updated weights for policy 0, policy_version 59255 (0.0012) -[2024-07-05 15:00:53,747][03976] Updated weights for policy 0, policy_version 59265 (0.0012) -[2024-07-05 15:00:55,501][03976] Updated weights for policy 0, policy_version 59275 (0.0011) -[2024-07-05 15:00:56,168][03423] Fps is (10 sec: 46694.2, 60 sec: 46830.8, 300 sec: 41265.4). Total num frames: 465600512. Throughput: 0: 11750.5. Samples: 3879956. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:00:56,169][03423] Avg episode reward: [(0, '54.584')] -[2024-07-05 15:00:57,256][03976] Updated weights for policy 0, policy_version 59285 (0.0010) -[2024-07-05 15:00:59,000][03976] Updated weights for policy 0, policy_version 59295 (0.0008) -[2024-07-05 15:01:00,799][03976] Updated weights for policy 0, policy_version 59305 (0.0008) -[2024-07-05 15:01:01,167][03423] Fps is (10 sec: 46694.8, 60 sec: 46967.6, 300 sec: 41459.9). Total num frames: 465838080. Throughput: 0: 11732.0. Samples: 3949716. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) -[2024-07-05 15:01:01,168][03423] Avg episode reward: [(0, '53.295')] -[2024-07-05 15:01:02,558][03976] Updated weights for policy 0, policy_version 59315 (0.0008) -[2024-07-05 15:01:04,324][03976] Updated weights for policy 0, policy_version 59325 (0.0008) -[2024-07-05 15:01:06,085][03976] Updated weights for policy 0, policy_version 59335 (0.0008) -[2024-07-05 15:01:06,168][03423] Fps is (10 sec: 46695.1, 60 sec: 46967.4, 300 sec: 41598.7). Total num frames: 466067456. Throughput: 0: 11712.2. Samples: 3984776. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:01:06,169][03423] Avg episode reward: [(0, '52.675')] -[2024-07-05 15:01:07,808][03976] Updated weights for policy 0, policy_version 59345 (0.0008) -[2024-07-05 15:01:09,584][03976] Updated weights for policy 0, policy_version 59355 (0.0008) -[2024-07-05 15:01:11,167][03423] Fps is (10 sec: 46694.8, 60 sec: 46967.5, 300 sec: 41765.3). Total num frames: 466305024. Throughput: 0: 11714.0. Samples: 4055000. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:01:11,169][03423] Avg episode reward: [(0, '53.435')] -[2024-07-05 15:01:11,265][03976] Updated weights for policy 0, policy_version 59365 (0.0008) -[2024-07-05 15:01:13,057][03976] Updated weights for policy 0, policy_version 59375 (0.0007) -[2024-07-05 15:01:14,849][03976] Updated weights for policy 0, policy_version 59385 (0.0008) -[2024-07-05 15:01:16,168][03423] Fps is (10 sec: 46694.1, 60 sec: 46830.9, 300 sec: 41904.2). Total num frames: 466534400. Throughput: 0: 11721.6. Samples: 4125204. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:01:16,169][03423] Avg episode reward: [(0, '52.197')] -[2024-07-05 15:01:16,548][03976] Updated weights for policy 0, policy_version 59395 (0.0008) -[2024-07-05 15:01:18,305][03976] Updated weights for policy 0, policy_version 59405 (0.0009) -[2024-07-05 15:01:20,067][03976] Updated weights for policy 0, policy_version 59415 (0.0009) -[2024-07-05 15:01:21,168][03423] Fps is (10 sec: 45874.8, 60 sec: 46830.9, 300 sec: 42098.5). Total num frames: 466763776. Throughput: 0: 11736.2. Samples: 4160868. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:01:21,169][03423] Avg episode reward: [(0, '54.014')] -[2024-07-05 15:01:21,857][03976] Updated weights for policy 0, policy_version 59425 (0.0013) -[2024-07-05 15:01:23,583][03976] Updated weights for policy 0, policy_version 59435 (0.0010) -[2024-07-05 15:01:25,352][03976] Updated weights for policy 0, policy_version 59445 (0.0009) -[2024-07-05 15:01:26,167][03423] Fps is (10 sec: 46695.0, 60 sec: 46830.9, 300 sec: 42265.2). Total num frames: 467001344. Throughput: 0: 11709.3. Samples: 4230064. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:01:26,168][03423] Avg episode reward: [(0, '53.253')] -[2024-07-05 15:01:27,122][03976] Updated weights for policy 0, policy_version 59455 (0.0014) -[2024-07-05 15:01:28,809][03976] Updated weights for policy 0, policy_version 59465 (0.0007) -[2024-07-05 15:01:30,481][03976] Updated weights for policy 0, policy_version 59475 (0.0011) -[2024-07-05 15:01:31,167][03423] Fps is (10 sec: 48333.0, 60 sec: 46967.5, 300 sec: 42459.6). Total num frames: 467247104. Throughput: 0: 11731.9. Samples: 4301672. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:01:31,169][03423] Avg episode reward: [(0, '53.312')] -[2024-07-05 15:01:32,174][03976] Updated weights for policy 0, policy_version 59485 (0.0008) -[2024-07-05 15:01:33,852][03976] Updated weights for policy 0, policy_version 59495 (0.0008) -[2024-07-05 15:01:35,528][03976] Updated weights for policy 0, policy_version 59505 (0.0008) -[2024-07-05 15:01:36,168][03423] Fps is (10 sec: 48332.5, 60 sec: 47104.0, 300 sec: 42653.9). Total num frames: 467484672. Throughput: 0: 11757.5. Samples: 4338200. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:01:36,169][03423] Avg episode reward: [(0, '54.207')] -[2024-07-05 15:01:37,213][03976] Updated weights for policy 0, policy_version 59515 (0.0012) -[2024-07-05 15:01:38,900][03976] Updated weights for policy 0, policy_version 59525 (0.0007) -[2024-07-05 15:01:40,583][03976] Updated weights for policy 0, policy_version 59535 (0.0007) -[2024-07-05 15:01:41,168][03423] Fps is (10 sec: 48332.6, 60 sec: 47240.5, 300 sec: 42848.3). Total num frames: 467730432. Throughput: 0: 11805.2. Samples: 4411188. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:01:41,169][03423] Avg episode reward: [(0, '51.055')] -[2024-07-05 15:01:42,273][03976] Updated weights for policy 0, policy_version 59545 (0.0007) -[2024-07-05 15:01:43,942][03976] Updated weights for policy 0, policy_version 59555 (0.0008) -[2024-07-05 15:01:45,641][03976] Updated weights for policy 0, policy_version 59565 (0.0008) -[2024-07-05 15:01:46,168][03423] Fps is (10 sec: 49152.2, 60 sec: 47377.2, 300 sec: 43042.7). Total num frames: 467976192. Throughput: 0: 11871.1. Samples: 4483916. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:01:46,169][03423] Avg episode reward: [(0, '54.671')] -[2024-07-05 15:01:47,334][03976] Updated weights for policy 0, policy_version 59575 (0.0007) -[2024-07-05 15:01:49,027][03976] Updated weights for policy 0, policy_version 59585 (0.0008) -[2024-07-05 15:01:50,707][03976] Updated weights for policy 0, policy_version 59595 (0.0007) -[2024-07-05 15:01:51,167][03423] Fps is (10 sec: 48332.9, 60 sec: 47377.1, 300 sec: 43210.2). Total num frames: 468213760. Throughput: 0: 11900.4. Samples: 4520292. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:01:51,169][03423] Avg episode reward: [(0, '53.708')] -[2024-07-05 15:01:52,466][03976] Updated weights for policy 0, policy_version 59605 (0.0012) -[2024-07-05 15:01:54,135][03976] Updated weights for policy 0, policy_version 59615 (0.0008) -[2024-07-05 15:01:55,797][03976] Updated weights for policy 0, policy_version 59625 (0.0007) -[2024-07-05 15:01:56,168][03423] Fps is (10 sec: 48332.7, 60 sec: 47650.2, 300 sec: 43403.7). Total num frames: 468459520. Throughput: 0: 11957.0. Samples: 4593068. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:01:56,169][03423] Avg episode reward: [(0, '54.609')] -[2024-07-05 15:01:57,488][03976] Updated weights for policy 0, policy_version 59635 (0.0010) -[2024-07-05 15:01:59,163][03976] Updated weights for policy 0, policy_version 59645 (0.0010) -[2024-07-05 15:02:00,844][03976] Updated weights for policy 0, policy_version 59655 (0.0008) -[2024-07-05 15:02:01,168][03423] Fps is (10 sec: 48332.7, 60 sec: 47650.1, 300 sec: 43599.7). Total num frames: 468697088. Throughput: 0: 12010.2. Samples: 4665660. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:02:01,168][03423] Avg episode reward: [(0, '55.321')] -[2024-07-05 15:02:02,544][03976] Updated weights for policy 0, policy_version 59665 (0.0008) -[2024-07-05 15:02:04,235][03976] Updated weights for policy 0, policy_version 59675 (0.0009) -[2024-07-05 15:02:05,913][03976] Updated weights for policy 0, policy_version 59685 (0.0008) -[2024-07-05 15:02:06,168][03423] Fps is (10 sec: 48332.8, 60 sec: 47923.2, 300 sec: 43820.3). Total num frames: 468942848. Throughput: 0: 12030.3. Samples: 4702232. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:02:06,169][03423] Avg episode reward: [(0, '51.857')] -[2024-07-05 15:02:06,172][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000059686_468942848.pth... -[2024-07-05 15:02:06,244][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000058346_457965568.pth -[2024-07-05 15:02:07,602][03976] Updated weights for policy 0, policy_version 59695 (0.0009) -[2024-07-05 15:02:09,312][03976] Updated weights for policy 0, policy_version 59705 (0.0007) -[2024-07-05 15:02:10,994][03976] Updated weights for policy 0, policy_version 59715 (0.0007) -[2024-07-05 15:02:11,167][03423] Fps is (10 sec: 49152.1, 60 sec: 48059.7, 300 sec: 44015.7). Total num frames: 469188608. Throughput: 0: 12106.9. Samples: 4774876. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:02:11,168][03423] Avg episode reward: [(0, '54.271')] -[2024-07-05 15:02:12,696][03976] Updated weights for policy 0, policy_version 59725 (0.0007) -[2024-07-05 15:02:14,491][03976] Updated weights for policy 0, policy_version 59735 (0.0011) -[2024-07-05 15:02:16,168][03423] Fps is (10 sec: 47512.6, 60 sec: 48059.6, 300 sec: 44153.4). Total num frames: 469417984. Throughput: 0: 12092.5. Samples: 4845840. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:02:16,169][03423] Avg episode reward: [(0, '55.025')] -[2024-07-05 15:02:16,232][03976] Updated weights for policy 0, policy_version 59745 (0.0008) -[2024-07-05 15:02:18,050][03976] Updated weights for policy 0, policy_version 59755 (0.0008) -[2024-07-05 15:02:19,880][03976] Updated weights for policy 0, policy_version 59765 (0.0014) -[2024-07-05 15:02:21,168][03423] Fps is (10 sec: 45875.1, 60 sec: 48059.7, 300 sec: 44320.1). Total num frames: 469647360. Throughput: 0: 12050.4. Samples: 4880468. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:02:21,168][03423] Avg episode reward: [(0, '52.236')] -[2024-07-05 15:02:21,609][03976] Updated weights for policy 0, policy_version 59775 (0.0008) -[2024-07-05 15:02:23,339][03976] Updated weights for policy 0, policy_version 59785 (0.0007) -[2024-07-05 15:02:25,096][03976] Updated weights for policy 0, policy_version 59795 (0.0008) -[2024-07-05 15:02:26,168][03423] Fps is (10 sec: 46695.6, 60 sec: 48059.7, 300 sec: 44570.0). Total num frames: 469884928. Throughput: 0: 11976.0. Samples: 4950108. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:02:26,168][03423] Avg episode reward: [(0, '54.095')] -[2024-07-05 15:02:26,865][03976] Updated weights for policy 0, policy_version 59805 (0.0014) -[2024-07-05 15:02:28,618][03976] Updated weights for policy 0, policy_version 59815 (0.0008) -[2024-07-05 15:02:30,325][03976] Updated weights for policy 0, policy_version 59825 (0.0007) -[2024-07-05 15:02:31,167][03423] Fps is (10 sec: 46694.5, 60 sec: 47786.6, 300 sec: 44792.2). Total num frames: 470114304. Throughput: 0: 11904.4. Samples: 5019612. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:02:31,168][03423] Avg episode reward: [(0, '56.489')] -[2024-07-05 15:02:32,119][03976] Updated weights for policy 0, policy_version 59835 (0.0008) -[2024-07-05 15:02:33,918][03976] Updated weights for policy 0, policy_version 59845 (0.0011) -[2024-07-05 15:02:35,686][03976] Updated weights for policy 0, policy_version 59855 (0.0008) -[2024-07-05 15:02:36,168][03423] Fps is (10 sec: 45875.0, 60 sec: 47650.1, 300 sec: 44931.0). Total num frames: 470343680. Throughput: 0: 11869.9. Samples: 5054440. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:02:36,169][03423] Avg episode reward: [(0, '51.818')] -[2024-07-05 15:02:37,469][03976] Updated weights for policy 0, policy_version 59865 (0.0008) -[2024-07-05 15:02:39,277][03976] Updated weights for policy 0, policy_version 59875 (0.0012) -[2024-07-05 15:02:40,999][03976] Updated weights for policy 0, policy_version 59885 (0.0009) -[2024-07-05 15:02:41,167][03423] Fps is (10 sec: 45875.2, 60 sec: 47377.1, 300 sec: 45125.4). Total num frames: 470573056. Throughput: 0: 11785.1. Samples: 5123396. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:02:41,168][03423] Avg episode reward: [(0, '51.753')] -[2024-07-05 15:02:42,748][03976] Updated weights for policy 0, policy_version 59895 (0.0010) -[2024-07-05 15:02:44,419][03976] Updated weights for policy 0, policy_version 59905 (0.0007) -[2024-07-05 15:02:46,168][03423] Fps is (10 sec: 46694.1, 60 sec: 47240.5, 300 sec: 45319.8). Total num frames: 470810624. Throughput: 0: 11749.7. Samples: 5194396. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:02:46,169][03423] Avg episode reward: [(0, '52.989')] -[2024-07-05 15:02:46,194][03976] Updated weights for policy 0, policy_version 59915 (0.0008) -[2024-07-05 15:02:47,901][03976] Updated weights for policy 0, policy_version 59925 (0.0007) -[2024-07-05 15:02:49,696][03976] Updated weights for policy 0, policy_version 59935 (0.0011) -[2024-07-05 15:02:51,168][03423] Fps is (10 sec: 47513.1, 60 sec: 47240.4, 300 sec: 45542.0). Total num frames: 471048192. Throughput: 0: 11717.3. Samples: 5229512. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:02:51,169][03423] Avg episode reward: [(0, '52.452')] -[2024-07-05 15:02:51,440][03976] Updated weights for policy 0, policy_version 59945 (0.0015) -[2024-07-05 15:02:53,155][03976] Updated weights for policy 0, policy_version 59955 (0.0008) -[2024-07-05 15:02:54,908][03976] Updated weights for policy 0, policy_version 59965 (0.0008) -[2024-07-05 15:02:56,167][03423] Fps is (10 sec: 47514.2, 60 sec: 47104.0, 300 sec: 45764.1). Total num frames: 471285760. Throughput: 0: 11674.5. Samples: 5300228. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:02:56,168][03423] Avg episode reward: [(0, '50.796')] -[2024-07-05 15:02:56,655][03976] Updated weights for policy 0, policy_version 59975 (0.0008) -[2024-07-05 15:02:58,364][03976] Updated weights for policy 0, policy_version 59985 (0.0008) -[2024-07-05 15:03:00,097][03976] Updated weights for policy 0, policy_version 59995 (0.0007) -[2024-07-05 15:03:01,168][03423] Fps is (10 sec: 47513.9, 60 sec: 47104.0, 300 sec: 45986.3). Total num frames: 471523328. Throughput: 0: 11678.3. Samples: 5371360. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:03:01,169][03423] Avg episode reward: [(0, '53.154')] -[2024-07-05 15:03:01,774][03976] Updated weights for policy 0, policy_version 60005 (0.0008) -[2024-07-05 15:03:03,634][03976] Updated weights for policy 0, policy_version 60015 (0.0008) -[2024-07-05 15:03:05,458][03976] Updated weights for policy 0, policy_version 60025 (0.0012) -[2024-07-05 15:03:06,168][03423] Fps is (10 sec: 46694.3, 60 sec: 46831.0, 300 sec: 46154.3). Total num frames: 471752704. Throughput: 0: 11679.2. Samples: 5406032. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:03:06,169][03423] Avg episode reward: [(0, '55.342')] -[2024-07-05 15:03:07,201][03976] Updated weights for policy 0, policy_version 60035 (0.0008) -[2024-07-05 15:03:08,948][03976] Updated weights for policy 0, policy_version 60045 (0.0008) -[2024-07-05 15:03:10,736][03976] Updated weights for policy 0, policy_version 60055 (0.0008) -[2024-07-05 15:03:11,168][03423] Fps is (10 sec: 45875.0, 60 sec: 46557.8, 300 sec: 46319.5). Total num frames: 471982080. Throughput: 0: 11663.7. Samples: 5474976. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:03:11,169][03423] Avg episode reward: [(0, '55.011')] -[2024-07-05 15:03:12,512][03976] Updated weights for policy 0, policy_version 60065 (0.0008) -[2024-07-05 15:03:14,351][03976] Updated weights for policy 0, policy_version 60075 (0.0008) -[2024-07-05 15:03:16,098][03976] Updated weights for policy 0, policy_version 60085 (0.0009) -[2024-07-05 15:03:16,168][03423] Fps is (10 sec: 45875.2, 60 sec: 46558.1, 300 sec: 46486.1). Total num frames: 472211456. Throughput: 0: 11647.8. Samples: 5543764. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:03:16,168][03423] Avg episode reward: [(0, '54.775')] -[2024-07-05 15:03:17,901][03976] Updated weights for policy 0, policy_version 60095 (0.0008) -[2024-07-05 15:03:19,623][03976] Updated weights for policy 0, policy_version 60105 (0.0008) -[2024-07-05 15:03:21,167][03423] Fps is (10 sec: 46695.0, 60 sec: 46694.4, 300 sec: 46680.5). Total num frames: 472449024. Throughput: 0: 11655.9. Samples: 5578956. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:03:21,169][03423] Avg episode reward: [(0, '54.990')] -[2024-07-05 15:03:21,337][03976] Updated weights for policy 0, policy_version 60115 (0.0008) -[2024-07-05 15:03:23,075][03976] Updated weights for policy 0, policy_version 60125 (0.0011) -[2024-07-05 15:03:24,877][03976] Updated weights for policy 0, policy_version 60135 (0.0010) -[2024-07-05 15:03:26,167][03423] Fps is (10 sec: 46694.7, 60 sec: 46557.9, 300 sec: 46874.9). Total num frames: 472678400. Throughput: 0: 11677.5. Samples: 5648884. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:03:26,168][03423] Avg episode reward: [(0, '53.826')] -[2024-07-05 15:03:26,664][03976] Updated weights for policy 0, policy_version 60145 (0.0011) -[2024-07-05 15:03:28,390][03976] Updated weights for policy 0, policy_version 60155 (0.0011) -[2024-07-05 15:03:30,174][03976] Updated weights for policy 0, policy_version 60165 (0.0008) -[2024-07-05 15:03:31,168][03423] Fps is (10 sec: 45874.6, 60 sec: 46557.8, 300 sec: 47041.5). Total num frames: 472907776. Throughput: 0: 11640.0. Samples: 5718196. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:03:31,169][03423] Avg episode reward: [(0, '55.282')] -[2024-07-05 15:03:31,893][03976] Updated weights for policy 0, policy_version 60175 (0.0010) -[2024-07-05 15:03:33,639][03976] Updated weights for policy 0, policy_version 60185 (0.0008) -[2024-07-05 15:03:35,350][03976] Updated weights for policy 0, policy_version 60195 (0.0008) -[2024-07-05 15:03:36,167][03423] Fps is (10 sec: 46694.4, 60 sec: 46694.5, 300 sec: 47124.8). Total num frames: 473145344. Throughput: 0: 11646.0. Samples: 5753580. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:03:36,168][03423] Avg episode reward: [(0, '56.023')] -[2024-07-05 15:03:37,031][03976] Updated weights for policy 0, policy_version 60205 (0.0008) -[2024-07-05 15:03:38,737][03976] Updated weights for policy 0, policy_version 60215 (0.0008) -[2024-07-05 15:03:40,504][03976] Updated weights for policy 0, policy_version 60225 (0.0008) -[2024-07-05 15:03:41,167][03423] Fps is (10 sec: 47514.3, 60 sec: 46831.0, 300 sec: 47124.8). Total num frames: 473382912. Throughput: 0: 11680.5. Samples: 5825848. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:03:41,168][03423] Avg episode reward: [(0, '53.047')] -[2024-07-05 15:03:42,160][03976] Updated weights for policy 0, policy_version 60235 (0.0007) -[2024-07-05 15:03:43,877][03976] Updated weights for policy 0, policy_version 60245 (0.0007) -[2024-07-05 15:03:45,578][03976] Updated weights for policy 0, policy_version 60255 (0.0008) -[2024-07-05 15:03:46,167][03423] Fps is (10 sec: 48332.7, 60 sec: 46967.6, 300 sec: 47152.6). Total num frames: 473628672. Throughput: 0: 11708.2. Samples: 5898228. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:03:46,169][03423] Avg episode reward: [(0, '53.796')] -[2024-07-05 15:03:47,261][03976] Updated weights for policy 0, policy_version 60265 (0.0010) -[2024-07-05 15:03:48,911][03976] Updated weights for policy 0, policy_version 60275 (0.0009) -[2024-07-05 15:03:50,609][03976] Updated weights for policy 0, policy_version 60285 (0.0010) -[2024-07-05 15:03:51,168][03423] Fps is (10 sec: 49151.5, 60 sec: 47104.0, 300 sec: 47180.4). Total num frames: 473874432. Throughput: 0: 11749.9. Samples: 5934776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:03:51,168][03423] Avg episode reward: [(0, '53.197')] -[2024-07-05 15:03:52,285][03976] Updated weights for policy 0, policy_version 60295 (0.0008) -[2024-07-05 15:03:54,003][03976] Updated weights for policy 0, policy_version 60305 (0.0008) -[2024-07-05 15:03:55,715][03976] Updated weights for policy 0, policy_version 60315 (0.0008) -[2024-07-05 15:03:56,168][03423] Fps is (10 sec: 48332.3, 60 sec: 47103.9, 300 sec: 47180.4). Total num frames: 474112000. Throughput: 0: 11823.3. Samples: 6007024. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:03:56,169][03423] Avg episode reward: [(0, '55.208')] -[2024-07-05 15:03:57,378][03976] Updated weights for policy 0, policy_version 60325 (0.0008) -[2024-07-05 15:03:59,098][03976] Updated weights for policy 0, policy_version 60335 (0.0010) -[2024-07-05 15:04:00,816][03976] Updated weights for policy 0, policy_version 60345 (0.0008) -[2024-07-05 15:04:01,168][03423] Fps is (10 sec: 48332.3, 60 sec: 47240.4, 300 sec: 47208.1). Total num frames: 474357760. Throughput: 0: 11906.2. Samples: 6079544. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:01,168][03423] Avg episode reward: [(0, '53.853')] -[2024-07-05 15:04:02,476][03976] Updated weights for policy 0, policy_version 60355 (0.0007) -[2024-07-05 15:04:04,172][03976] Updated weights for policy 0, policy_version 60365 (0.0008) -[2024-07-05 15:04:05,892][03976] Updated weights for policy 0, policy_version 60375 (0.0009) -[2024-07-05 15:04:06,167][03423] Fps is (10 sec: 48333.5, 60 sec: 47377.1, 300 sec: 47208.1). Total num frames: 474595328. Throughput: 0: 11930.5. Samples: 6115828. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:06,168][03423] Avg episode reward: [(0, '54.233')] -[2024-07-05 15:04:06,216][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000060377_474603520.pth... -[2024-07-05 15:04:06,286][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000058992_463257600.pth -[2024-07-05 15:04:07,577][03976] Updated weights for policy 0, policy_version 60385 (0.0008) -[2024-07-05 15:04:09,304][03976] Updated weights for policy 0, policy_version 60395 (0.0010) -[2024-07-05 15:04:10,970][03976] Updated weights for policy 0, policy_version 60405 (0.0008) -[2024-07-05 15:04:11,168][03423] Fps is (10 sec: 48332.7, 60 sec: 47650.0, 300 sec: 47235.9). Total num frames: 474841088. Throughput: 0: 11986.9. Samples: 6188296. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:11,169][03423] Avg episode reward: [(0, '56.566')] -[2024-07-05 15:04:12,692][03976] Updated weights for policy 0, policy_version 60415 (0.0010) -[2024-07-05 15:04:14,376][03976] Updated weights for policy 0, policy_version 60425 (0.0007) -[2024-07-05 15:04:16,049][03976] Updated weights for policy 0, policy_version 60435 (0.0008) -[2024-07-05 15:04:16,168][03423] Fps is (10 sec: 48332.6, 60 sec: 47786.7, 300 sec: 47235.9). Total num frames: 475078656. Throughput: 0: 12053.1. Samples: 6260584. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:16,168][03423] Avg episode reward: [(0, '54.650')] -[2024-07-05 15:04:17,772][03976] Updated weights for policy 0, policy_version 60445 (0.0008) -[2024-07-05 15:04:19,447][03976] Updated weights for policy 0, policy_version 60455 (0.0010) -[2024-07-05 15:04:21,168][03423] Fps is (10 sec: 48333.7, 60 sec: 47923.2, 300 sec: 47263.7). Total num frames: 475324416. Throughput: 0: 12076.7. Samples: 6297032. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:21,169][03423] Avg episode reward: [(0, '50.422')] -[2024-07-05 15:04:21,169][03976] Updated weights for policy 0, policy_version 60465 (0.0008) -[2024-07-05 15:04:22,828][03976] Updated weights for policy 0, policy_version 60475 (0.0008) -[2024-07-05 15:04:24,514][03976] Updated weights for policy 0, policy_version 60485 (0.0011) -[2024-07-05 15:04:26,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48059.7, 300 sec: 47263.7). Total num frames: 475561984. Throughput: 0: 12089.9. Samples: 6369892. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:26,168][03423] Avg episode reward: [(0, '52.314')] -[2024-07-05 15:04:26,185][03976] Updated weights for policy 0, policy_version 60495 (0.0008) -[2024-07-05 15:04:27,888][03976] Updated weights for policy 0, policy_version 60505 (0.0008) -[2024-07-05 15:04:29,571][03976] Updated weights for policy 0, policy_version 60515 (0.0007) -[2024-07-05 15:04:31,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48332.9, 300 sec: 47291.4). Total num frames: 475807744. Throughput: 0: 12089.0. Samples: 6442232. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:31,168][03423] Avg episode reward: [(0, '54.383')] -[2024-07-05 15:04:31,284][03976] Updated weights for policy 0, policy_version 60525 (0.0007) -[2024-07-05 15:04:32,994][03976] Updated weights for policy 0, policy_version 60535 (0.0007) -[2024-07-05 15:04:34,705][03976] Updated weights for policy 0, policy_version 60545 (0.0010) -[2024-07-05 15:04:36,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48332.8, 300 sec: 47319.2). Total num frames: 476045312. Throughput: 0: 12080.6. Samples: 6478404. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:36,169][03423] Avg episode reward: [(0, '53.288')] -[2024-07-05 15:04:36,411][03976] Updated weights for policy 0, policy_version 60555 (0.0008) -[2024-07-05 15:04:38,133][03976] Updated weights for policy 0, policy_version 60565 (0.0013) -[2024-07-05 15:04:39,801][03976] Updated weights for policy 0, policy_version 60575 (0.0007) -[2024-07-05 15:04:41,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48469.3, 300 sec: 47347.0). Total num frames: 476291072. Throughput: 0: 12078.3. Samples: 6550548. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:41,169][03423] Avg episode reward: [(0, '55.977')] -[2024-07-05 15:04:41,489][03976] Updated weights for policy 0, policy_version 60585 (0.0008) -[2024-07-05 15:04:43,198][03976] Updated weights for policy 0, policy_version 60595 (0.0008) -[2024-07-05 15:04:44,880][03976] Updated weights for policy 0, policy_version 60605 (0.0007) -[2024-07-05 15:04:46,168][03423] Fps is (10 sec: 48332.1, 60 sec: 48332.7, 300 sec: 47347.0). Total num frames: 476528640. Throughput: 0: 12077.6. Samples: 6623036. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:46,169][03423] Avg episode reward: [(0, '52.223')] -[2024-07-05 15:04:46,588][03976] Updated weights for policy 0, policy_version 60615 (0.0008) -[2024-07-05 15:04:48,293][03976] Updated weights for policy 0, policy_version 60625 (0.0008) -[2024-07-05 15:04:49,976][03976] Updated weights for policy 0, policy_version 60635 (0.0008) -[2024-07-05 15:04:51,168][03423] Fps is (10 sec: 48331.6, 60 sec: 48332.6, 300 sec: 47402.5). Total num frames: 476774400. Throughput: 0: 12072.1. Samples: 6659076. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:51,169][03423] Avg episode reward: [(0, '55.454')] -[2024-07-05 15:04:51,682][03976] Updated weights for policy 0, policy_version 60645 (0.0008) -[2024-07-05 15:04:53,352][03976] Updated weights for policy 0, policy_version 60655 (0.0007) -[2024-07-05 15:04:55,042][03976] Updated weights for policy 0, policy_version 60665 (0.0007) -[2024-07-05 15:04:56,168][03423] Fps is (10 sec: 48333.2, 60 sec: 48332.8, 300 sec: 47430.3). Total num frames: 477011968. Throughput: 0: 12080.8. Samples: 6731932. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:04:56,169][03423] Avg episode reward: [(0, '52.738')] -[2024-07-05 15:04:56,728][03976] Updated weights for policy 0, policy_version 60675 (0.0008) -[2024-07-05 15:04:58,486][03976] Updated weights for policy 0, policy_version 60685 (0.0007) -[2024-07-05 15:05:00,106][03976] Updated weights for policy 0, policy_version 60695 (0.0008) -[2024-07-05 15:05:01,168][03423] Fps is (10 sec: 48333.7, 60 sec: 48332.9, 300 sec: 47485.8). Total num frames: 477257728. Throughput: 0: 12089.8. Samples: 6804628. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:05:01,169][03423] Avg episode reward: [(0, '54.927')] -[2024-07-05 15:05:01,781][03976] Updated weights for policy 0, policy_version 60705 (0.0007) -[2024-07-05 15:05:03,501][03976] Updated weights for policy 0, policy_version 60715 (0.0007) -[2024-07-05 15:05:05,183][03976] Updated weights for policy 0, policy_version 60725 (0.0008) -[2024-07-05 15:05:06,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48332.7, 300 sec: 47485.8). Total num frames: 477495296. Throughput: 0: 12086.4. Samples: 6840920. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:05:06,169][03423] Avg episode reward: [(0, '54.496')] -[2024-07-05 15:05:06,882][03976] Updated weights for policy 0, policy_version 60735 (0.0008) -[2024-07-05 15:05:08,638][03976] Updated weights for policy 0, policy_version 60745 (0.0008) -[2024-07-05 15:05:10,320][03976] Updated weights for policy 0, policy_version 60755 (0.0008) -[2024-07-05 15:05:11,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48332.8, 300 sec: 47513.6). Total num frames: 477741056. Throughput: 0: 12067.1. Samples: 6912912. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:05:11,169][03423] Avg episode reward: [(0, '54.518')] -[2024-07-05 15:05:11,995][03976] Updated weights for policy 0, policy_version 60765 (0.0007) -[2024-07-05 15:05:13,733][03976] Updated weights for policy 0, policy_version 60775 (0.0008) -[2024-07-05 15:05:15,464][03976] Updated weights for policy 0, policy_version 60785 (0.0008) -[2024-07-05 15:05:16,168][03423] Fps is (10 sec: 48333.1, 60 sec: 48332.8, 300 sec: 47541.4). Total num frames: 477978624. Throughput: 0: 12072.3. Samples: 6985488. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:05:16,169][03423] Avg episode reward: [(0, '55.066')] -[2024-07-05 15:05:17,137][03976] Updated weights for policy 0, policy_version 60795 (0.0008) -[2024-07-05 15:05:18,835][03976] Updated weights for policy 0, policy_version 60805 (0.0010) -[2024-07-05 15:05:20,494][03976] Updated weights for policy 0, policy_version 60815 (0.0008) -[2024-07-05 15:05:21,167][03423] Fps is (10 sec: 47514.4, 60 sec: 48196.3, 300 sec: 47541.4). Total num frames: 478216192. Throughput: 0: 12073.8. Samples: 7021724. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:05:21,169][03423] Avg episode reward: [(0, '52.536')] -[2024-07-05 15:05:22,184][03976] Updated weights for policy 0, policy_version 60825 (0.0009) -[2024-07-05 15:05:23,872][03976] Updated weights for policy 0, policy_version 60835 (0.0010) -[2024-07-05 15:05:25,561][03976] Updated weights for policy 0, policy_version 60845 (0.0008) -[2024-07-05 15:05:26,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 47569.1). Total num frames: 478461952. Throughput: 0: 12078.5. Samples: 7094080. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:05:26,168][03423] Avg episode reward: [(0, '54.110')] -[2024-07-05 15:05:27,265][03976] Updated weights for policy 0, policy_version 60855 (0.0008) -[2024-07-05 15:05:28,957][03976] Updated weights for policy 0, policy_version 60865 (0.0008) -[2024-07-05 15:05:30,662][03976] Updated weights for policy 0, policy_version 60875 (0.0008) -[2024-07-05 15:05:31,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 47596.9). Total num frames: 478699520. Throughput: 0: 12073.7. Samples: 7166348. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:05:31,168][03423] Avg episode reward: [(0, '56.118')] -[2024-07-05 15:05:32,368][03976] Updated weights for policy 0, policy_version 60885 (0.0008) -[2024-07-05 15:05:34,076][03976] Updated weights for policy 0, policy_version 60895 (0.0008) -[2024-07-05 15:05:35,729][03976] Updated weights for policy 0, policy_version 60905 (0.0009) -[2024-07-05 15:05:36,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48332.8, 300 sec: 47624.7). Total num frames: 478945280. Throughput: 0: 12088.2. Samples: 7203040. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:05:36,177][03423] Avg episode reward: [(0, '56.054')] -[2024-07-05 15:05:37,440][03976] Updated weights for policy 0, policy_version 60915 (0.0008) -[2024-07-05 15:05:39,135][03976] Updated weights for policy 0, policy_version 60925 (0.0009) -[2024-07-05 15:05:40,849][03976] Updated weights for policy 0, policy_version 60935 (0.0008) -[2024-07-05 15:05:41,168][03423] Fps is (10 sec: 48332.2, 60 sec: 48196.2, 300 sec: 47624.7). Total num frames: 479182848. Throughput: 0: 12070.5. Samples: 7275104. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:05:41,169][03423] Avg episode reward: [(0, '53.439')] -[2024-07-05 15:05:42,524][03976] Updated weights for policy 0, policy_version 60945 (0.0008) -[2024-07-05 15:05:44,248][03976] Updated weights for policy 0, policy_version 60955 (0.0008) -[2024-07-05 15:05:45,937][03976] Updated weights for policy 0, policy_version 60965 (0.0011) -[2024-07-05 15:05:46,167][03423] Fps is (10 sec: 48333.1, 60 sec: 48332.9, 300 sec: 47652.5). Total num frames: 479428608. Throughput: 0: 12070.2. Samples: 7347784. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:05:46,168][03423] Avg episode reward: [(0, '55.201')] -[2024-07-05 15:05:47,660][03976] Updated weights for policy 0, policy_version 60975 (0.0007) -[2024-07-05 15:05:49,316][03976] Updated weights for policy 0, policy_version 60985 (0.0007) -[2024-07-05 15:05:50,977][03976] Updated weights for policy 0, policy_version 60995 (0.0008) -[2024-07-05 15:05:51,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.5, 300 sec: 47680.2). Total num frames: 479666176. Throughput: 0: 12060.4. Samples: 7383636. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:05:51,169][03423] Avg episode reward: [(0, '52.884')] -[2024-07-05 15:05:52,687][03976] Updated weights for policy 0, policy_version 61005 (0.0010) -[2024-07-05 15:05:54,399][03976] Updated weights for policy 0, policy_version 61015 (0.0007) -[2024-07-05 15:05:56,077][03976] Updated weights for policy 0, policy_version 61025 (0.0007) -[2024-07-05 15:05:56,168][03423] Fps is (10 sec: 48331.5, 60 sec: 48332.6, 300 sec: 47707.9). Total num frames: 479911936. Throughput: 0: 12074.9. Samples: 7456284. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:05:56,169][03423] Avg episode reward: [(0, '51.532')] -[2024-07-05 15:05:57,794][03976] Updated weights for policy 0, policy_version 61035 (0.0008) -[2024-07-05 15:05:59,498][03976] Updated weights for policy 0, policy_version 61045 (0.0007) -[2024-07-05 15:06:01,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 47735.8). Total num frames: 480149504. Throughput: 0: 12073.7. Samples: 7528804. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:06:01,168][03423] Avg episode reward: [(0, '53.925')] -[2024-07-05 15:06:01,186][03976] Updated weights for policy 0, policy_version 61055 (0.0008) -[2024-07-05 15:06:02,884][03976] Updated weights for policy 0, policy_version 61065 (0.0008) -[2024-07-05 15:06:04,581][03976] Updated weights for policy 0, policy_version 61075 (0.0007) -[2024-07-05 15:06:06,168][03423] Fps is (10 sec: 48333.2, 60 sec: 48332.7, 300 sec: 47763.5). Total num frames: 480395264. Throughput: 0: 12077.3. Samples: 7565204. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:06:06,169][03423] Avg episode reward: [(0, '54.948')] -[2024-07-05 15:06:06,173][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000061084_480395264.pth... -[2024-07-05 15:06:06,246][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000059686_468942848.pth -[2024-07-05 15:06:06,320][03976] Updated weights for policy 0, policy_version 61085 (0.0007) -[2024-07-05 15:06:07,970][03976] Updated weights for policy 0, policy_version 61095 (0.0008) -[2024-07-05 15:06:09,709][03976] Updated weights for policy 0, policy_version 61105 (0.0007) -[2024-07-05 15:06:11,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48196.3, 300 sec: 47791.3). Total num frames: 480632832. Throughput: 0: 12070.3. Samples: 7637244. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:06:11,169][03423] Avg episode reward: [(0, '54.726')] -[2024-07-05 15:06:11,379][03976] Updated weights for policy 0, policy_version 61115 (0.0010) -[2024-07-05 15:06:13,112][03976] Updated weights for policy 0, policy_version 61125 (0.0010) -[2024-07-05 15:06:14,795][03976] Updated weights for policy 0, policy_version 61135 (0.0008) -[2024-07-05 15:06:16,168][03423] Fps is (10 sec: 48333.6, 60 sec: 48332.8, 300 sec: 47846.8). Total num frames: 480878592. Throughput: 0: 12063.4. Samples: 7709204. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:06:16,169][03423] Avg episode reward: [(0, '53.004')] -[2024-07-05 15:06:16,513][03976] Updated weights for policy 0, policy_version 61145 (0.0008) -[2024-07-05 15:06:18,197][03976] Updated weights for policy 0, policy_version 61155 (0.0008) -[2024-07-05 15:06:19,907][03976] Updated weights for policy 0, policy_version 61165 (0.0008) -[2024-07-05 15:06:21,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48332.6, 300 sec: 47846.8). Total num frames: 481116160. Throughput: 0: 12054.1. Samples: 7745476. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:06:21,169][03423] Avg episode reward: [(0, '55.987')] -[2024-07-05 15:06:21,638][03976] Updated weights for policy 0, policy_version 61175 (0.0008) -[2024-07-05 15:06:23,337][03976] Updated weights for policy 0, policy_version 61185 (0.0007) -[2024-07-05 15:06:25,035][03976] Updated weights for policy 0, policy_version 61195 (0.0008) -[2024-07-05 15:06:26,168][03423] Fps is (10 sec: 47513.4, 60 sec: 48196.2, 300 sec: 47819.0). Total num frames: 481353728. Throughput: 0: 12054.9. Samples: 7817576. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:06:26,169][03423] Avg episode reward: [(0, '55.511')] -[2024-07-05 15:06:26,745][03976] Updated weights for policy 0, policy_version 61205 (0.0009) -[2024-07-05 15:06:28,468][03976] Updated weights for policy 0, policy_version 61215 (0.0008) -[2024-07-05 15:06:30,129][03976] Updated weights for policy 0, policy_version 61225 (0.0008) -[2024-07-05 15:06:31,167][03423] Fps is (10 sec: 48334.1, 60 sec: 48332.8, 300 sec: 47846.9). Total num frames: 481599488. Throughput: 0: 12044.5. Samples: 7889788. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:06:31,168][03423] Avg episode reward: [(0, '54.394')] -[2024-07-05 15:06:31,841][03976] Updated weights for policy 0, policy_version 61235 (0.0007) -[2024-07-05 15:06:33,541][03976] Updated weights for policy 0, policy_version 61245 (0.0009) -[2024-07-05 15:06:35,213][03976] Updated weights for policy 0, policy_version 61255 (0.0008) -[2024-07-05 15:06:36,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48196.2, 300 sec: 47819.0). Total num frames: 481837056. Throughput: 0: 12052.7. Samples: 7926008. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:06:36,169][03423] Avg episode reward: [(0, '52.376')] -[2024-07-05 15:06:36,900][03976] Updated weights for policy 0, policy_version 61265 (0.0008) -[2024-07-05 15:06:38,637][03976] Updated weights for policy 0, policy_version 61275 (0.0008) -[2024-07-05 15:06:40,329][03976] Updated weights for policy 0, policy_version 61285 (0.0007) -[2024-07-05 15:06:41,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48332.8, 300 sec: 47819.1). Total num frames: 482082816. Throughput: 0: 12045.2. Samples: 7998316. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:06:41,169][03423] Avg episode reward: [(0, '55.718')] -[2024-07-05 15:06:42,015][03976] Updated weights for policy 0, policy_version 61295 (0.0008) -[2024-07-05 15:06:43,719][03976] Updated weights for policy 0, policy_version 61305 (0.0008) -[2024-07-05 15:06:45,431][03976] Updated weights for policy 0, policy_version 61315 (0.0007) -[2024-07-05 15:06:46,167][03423] Fps is (10 sec: 48333.7, 60 sec: 48196.3, 300 sec: 47819.1). Total num frames: 482320384. Throughput: 0: 12040.3. Samples: 8070616. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:06:46,168][03423] Avg episode reward: [(0, '56.383')] -[2024-07-05 15:06:47,145][03976] Updated weights for policy 0, policy_version 61325 (0.0008) -[2024-07-05 15:06:48,836][03976] Updated weights for policy 0, policy_version 61335 (0.0008) -[2024-07-05 15:06:50,516][03976] Updated weights for policy 0, policy_version 61345 (0.0008) -[2024-07-05 15:06:51,168][03423] Fps is (10 sec: 47513.4, 60 sec: 48196.2, 300 sec: 47791.3). Total num frames: 482557952. Throughput: 0: 12035.3. Samples: 8106792. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:06:51,169][03423] Avg episode reward: [(0, '55.262')] -[2024-07-05 15:06:52,212][03976] Updated weights for policy 0, policy_version 61355 (0.0008) -[2024-07-05 15:06:53,925][03976] Updated weights for policy 0, policy_version 61365 (0.0010) -[2024-07-05 15:06:55,604][03976] Updated weights for policy 0, policy_version 61375 (0.0008) -[2024-07-05 15:06:56,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.5, 300 sec: 47819.1). Total num frames: 482803712. Throughput: 0: 12044.4. Samples: 8179240. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:06:56,169][03423] Avg episode reward: [(0, '53.580')] -[2024-07-05 15:06:57,300][03976] Updated weights for policy 0, policy_version 61385 (0.0007) -[2024-07-05 15:06:59,003][03976] Updated weights for policy 0, policy_version 61395 (0.0008) -[2024-07-05 15:07:00,686][03976] Updated weights for policy 0, policy_version 61405 (0.0010) -[2024-07-05 15:07:01,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48196.2, 300 sec: 47791.3). Total num frames: 483041280. Throughput: 0: 12055.1. Samples: 8251684. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:07:01,169][03423] Avg episode reward: [(0, '54.353')] -[2024-07-05 15:07:02,411][03976] Updated weights for policy 0, policy_version 61415 (0.0010) -[2024-07-05 15:07:04,098][03976] Updated weights for policy 0, policy_version 61425 (0.0008) -[2024-07-05 15:07:05,785][03976] Updated weights for policy 0, policy_version 61435 (0.0010) -[2024-07-05 15:07:06,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48196.4, 300 sec: 47791.3). Total num frames: 483287040. Throughput: 0: 12050.8. Samples: 8287760. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:07:06,168][03423] Avg episode reward: [(0, '55.186')] -[2024-07-05 15:07:07,497][03976] Updated weights for policy 0, policy_version 61445 (0.0007) -[2024-07-05 15:07:09,196][03976] Updated weights for policy 0, policy_version 61455 (0.0011) -[2024-07-05 15:07:10,899][03976] Updated weights for policy 0, policy_version 61465 (0.0008) -[2024-07-05 15:07:11,168][03423] Fps is (10 sec: 48332.0, 60 sec: 48196.1, 300 sec: 47819.1). Total num frames: 483524608. Throughput: 0: 12059.4. Samples: 8360252. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:07:11,169][03423] Avg episode reward: [(0, '54.922')] -[2024-07-05 15:07:12,598][03976] Updated weights for policy 0, policy_version 61475 (0.0008) -[2024-07-05 15:07:14,334][03976] Updated weights for policy 0, policy_version 61485 (0.0008) -[2024-07-05 15:07:16,019][03976] Updated weights for policy 0, policy_version 61495 (0.0007) -[2024-07-05 15:07:16,167][03423] Fps is (10 sec: 47513.8, 60 sec: 48059.8, 300 sec: 47846.8). Total num frames: 483762176. Throughput: 0: 12062.5. Samples: 8432600. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:07:16,168][03423] Avg episode reward: [(0, '56.330')] -[2024-07-05 15:07:17,657][03976] Updated weights for policy 0, policy_version 61505 (0.0008) -[2024-07-05 15:07:19,373][03976] Updated weights for policy 0, policy_version 61515 (0.0010) -[2024-07-05 15:07:21,081][03976] Updated weights for policy 0, policy_version 61525 (0.0008) -[2024-07-05 15:07:21,167][03423] Fps is (10 sec: 48334.6, 60 sec: 48196.5, 300 sec: 47874.6). Total num frames: 484007936. Throughput: 0: 12060.0. Samples: 8468704. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:07:21,168][03423] Avg episode reward: [(0, '54.665')] -[2024-07-05 15:07:22,788][03976] Updated weights for policy 0, policy_version 61535 (0.0009) -[2024-07-05 15:07:24,457][03976] Updated weights for policy 0, policy_version 61545 (0.0008) -[2024-07-05 15:07:26,163][03976] Updated weights for policy 0, policy_version 61555 (0.0008) -[2024-07-05 15:07:26,168][03423] Fps is (10 sec: 49151.0, 60 sec: 48332.7, 300 sec: 47930.1). Total num frames: 484253696. Throughput: 0: 12063.8. Samples: 8541188. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:07:26,169][03423] Avg episode reward: [(0, '55.677')] -[2024-07-05 15:07:27,869][03976] Updated weights for policy 0, policy_version 61565 (0.0007) -[2024-07-05 15:07:29,540][03976] Updated weights for policy 0, policy_version 61575 (0.0007) -[2024-07-05 15:07:31,167][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.2, 300 sec: 47957.9). Total num frames: 484491264. Throughput: 0: 12071.6. Samples: 8613840. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:07:31,169][03423] Avg episode reward: [(0, '55.232')] -[2024-07-05 15:07:31,232][03976] Updated weights for policy 0, policy_version 61585 (0.0008) -[2024-07-05 15:07:32,953][03976] Updated weights for policy 0, policy_version 61595 (0.0008) -[2024-07-05 15:07:34,650][03976] Updated weights for policy 0, policy_version 61605 (0.0008) -[2024-07-05 15:07:36,167][03423] Fps is (10 sec: 47514.6, 60 sec: 48196.4, 300 sec: 47985.7). Total num frames: 484728832. Throughput: 0: 12065.4. Samples: 8649732. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:07:36,169][03423] Avg episode reward: [(0, '54.226')] -[2024-07-05 15:07:36,343][03976] Updated weights for policy 0, policy_version 61615 (0.0008) -[2024-07-05 15:07:38,054][03976] Updated weights for policy 0, policy_version 61625 (0.0007) -[2024-07-05 15:07:39,765][03976] Updated weights for policy 0, policy_version 61635 (0.0007) -[2024-07-05 15:07:41,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48013.5). Total num frames: 484974592. Throughput: 0: 12069.9. Samples: 8722384. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:07:41,168][03423] Avg episode reward: [(0, '52.706')] -[2024-07-05 15:07:41,474][03976] Updated weights for policy 0, policy_version 61645 (0.0008) -[2024-07-05 15:07:43,141][03976] Updated weights for policy 0, policy_version 61655 (0.0008) -[2024-07-05 15:07:44,873][03976] Updated weights for policy 0, policy_version 61665 (0.0007) -[2024-07-05 15:07:46,168][03423] Fps is (10 sec: 48331.7, 60 sec: 48196.1, 300 sec: 48013.4). Total num frames: 485212160. Throughput: 0: 12061.1. Samples: 8794436. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:07:46,169][03423] Avg episode reward: [(0, '57.200')] -[2024-07-05 15:07:46,565][03976] Updated weights for policy 0, policy_version 61675 (0.0009) -[2024-07-05 15:07:48,284][03976] Updated weights for policy 0, policy_version 61685 (0.0008) -[2024-07-05 15:07:49,972][03976] Updated weights for policy 0, policy_version 61695 (0.0008) -[2024-07-05 15:07:51,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48332.9, 300 sec: 48041.2). Total num frames: 485457920. Throughput: 0: 12061.0. Samples: 8830504. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:07:51,169][03423] Avg episode reward: [(0, '53.850')] -[2024-07-05 15:07:51,676][03976] Updated weights for policy 0, policy_version 61705 (0.0007) -[2024-07-05 15:07:53,344][03976] Updated weights for policy 0, policy_version 61715 (0.0008) -[2024-07-05 15:07:55,038][03976] Updated weights for policy 0, policy_version 61725 (0.0007) -[2024-07-05 15:07:56,168][03423] Fps is (10 sec: 48333.7, 60 sec: 48196.2, 300 sec: 48041.2). Total num frames: 485695488. Throughput: 0: 12058.1. Samples: 8902864. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:07:56,169][03423] Avg episode reward: [(0, '56.362')] -[2024-07-05 15:07:56,717][03976] Updated weights for policy 0, policy_version 61735 (0.0008) -[2024-07-05 15:07:58,444][03976] Updated weights for policy 0, policy_version 61745 (0.0011) -[2024-07-05 15:08:00,161][03976] Updated weights for policy 0, policy_version 61755 (0.0007) -[2024-07-05 15:08:01,167][03423] Fps is (10 sec: 47513.7, 60 sec: 48196.4, 300 sec: 48069.0). Total num frames: 485933056. Throughput: 0: 12050.9. Samples: 8974892. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:01,169][03423] Avg episode reward: [(0, '55.171')] -[2024-07-05 15:08:01,867][03976] Updated weights for policy 0, policy_version 61765 (0.0007) -[2024-07-05 15:08:03,549][03976] Updated weights for policy 0, policy_version 61775 (0.0009) -[2024-07-05 15:08:05,279][03976] Updated weights for policy 0, policy_version 61785 (0.0008) -[2024-07-05 15:08:06,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48196.2, 300 sec: 48124.5). Total num frames: 486178816. Throughput: 0: 12056.0. Samples: 9011224. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:06,169][03423] Avg episode reward: [(0, '55.938')] -[2024-07-05 15:08:06,174][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000061790_486178816.pth... -[2024-07-05 15:08:06,244][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000060377_474603520.pth -[2024-07-05 15:08:06,960][03976] Updated weights for policy 0, policy_version 61795 (0.0008) -[2024-07-05 15:08:08,692][03976] Updated weights for policy 0, policy_version 61805 (0.0014) -[2024-07-05 15:08:10,411][03976] Updated weights for policy 0, policy_version 61815 (0.0008) -[2024-07-05 15:08:11,167][03423] Fps is (10 sec: 48333.1, 60 sec: 48196.6, 300 sec: 48152.3). Total num frames: 486416384. Throughput: 0: 12049.2. Samples: 9083400. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:11,168][03423] Avg episode reward: [(0, '52.826')] -[2024-07-05 15:08:12,087][03976] Updated weights for policy 0, policy_version 61825 (0.0008) -[2024-07-05 15:08:13,817][03976] Updated weights for policy 0, policy_version 61835 (0.0008) -[2024-07-05 15:08:15,522][03976] Updated weights for policy 0, policy_version 61845 (0.0011) -[2024-07-05 15:08:16,168][03423] Fps is (10 sec: 47514.0, 60 sec: 48196.2, 300 sec: 48152.3). Total num frames: 486653952. Throughput: 0: 12025.1. Samples: 9154968. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:16,169][03423] Avg episode reward: [(0, '49.664')] -[2024-07-05 15:08:17,182][03976] Updated weights for policy 0, policy_version 61855 (0.0008) -[2024-07-05 15:08:18,881][03976] Updated weights for policy 0, policy_version 61865 (0.0010) -[2024-07-05 15:08:20,560][03976] Updated weights for policy 0, policy_version 61875 (0.0009) -[2024-07-05 15:08:21,168][03423] Fps is (10 sec: 48332.2, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 486899712. Throughput: 0: 12041.9. Samples: 9191620. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:21,169][03423] Avg episode reward: [(0, '52.102')] -[2024-07-05 15:08:22,277][03976] Updated weights for policy 0, policy_version 61885 (0.0007) -[2024-07-05 15:08:23,954][03976] Updated weights for policy 0, policy_version 61895 (0.0008) -[2024-07-05 15:08:25,662][03976] Updated weights for policy 0, policy_version 61905 (0.0009) -[2024-07-05 15:08:26,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48059.8, 300 sec: 48235.6). Total num frames: 487137280. Throughput: 0: 12032.1. Samples: 9263832. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:26,169][03423] Avg episode reward: [(0, '50.935')] -[2024-07-05 15:08:27,378][03976] Updated weights for policy 0, policy_version 61915 (0.0007) -[2024-07-05 15:08:29,117][03976] Updated weights for policy 0, policy_version 61925 (0.0008) -[2024-07-05 15:08:30,783][03976] Updated weights for policy 0, policy_version 61935 (0.0010) -[2024-07-05 15:08:31,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 487383040. Throughput: 0: 12040.3. Samples: 9336248. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:31,168][03423] Avg episode reward: [(0, '53.853')] -[2024-07-05 15:08:32,475][03976] Updated weights for policy 0, policy_version 61945 (0.0008) -[2024-07-05 15:08:34,171][03976] Updated weights for policy 0, policy_version 61955 (0.0010) -[2024-07-05 15:08:35,853][03976] Updated weights for policy 0, policy_version 61965 (0.0008) -[2024-07-05 15:08:36,167][03423] Fps is (10 sec: 48333.6, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 487620608. Throughput: 0: 12041.5. Samples: 9372372. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:36,168][03423] Avg episode reward: [(0, '55.972')] -[2024-07-05 15:08:37,574][03976] Updated weights for policy 0, policy_version 61975 (0.0008) -[2024-07-05 15:08:39,246][03976] Updated weights for policy 0, policy_version 61985 (0.0008) -[2024-07-05 15:08:40,962][03976] Updated weights for policy 0, policy_version 61995 (0.0008) -[2024-07-05 15:08:41,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48263.4). Total num frames: 487866368. Throughput: 0: 12045.3. Samples: 9444900. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:41,168][03423] Avg episode reward: [(0, '56.147')] -[2024-07-05 15:08:42,653][03976] Updated weights for policy 0, policy_version 62005 (0.0008) -[2024-07-05 15:08:44,330][03976] Updated weights for policy 0, policy_version 62015 (0.0007) -[2024-07-05 15:08:46,029][03976] Updated weights for policy 0, policy_version 62025 (0.0008) -[2024-07-05 15:08:46,167][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.5, 300 sec: 48235.6). Total num frames: 488103936. Throughput: 0: 12056.1. Samples: 9517416. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:46,168][03423] Avg episode reward: [(0, '54.903')] -[2024-07-05 15:08:47,762][03976] Updated weights for policy 0, policy_version 62035 (0.0010) -[2024-07-05 15:08:49,456][03976] Updated weights for policy 0, policy_version 62045 (0.0007) -[2024-07-05 15:08:51,112][03976] Updated weights for policy 0, policy_version 62055 (0.0010) -[2024-07-05 15:08:51,167][03423] Fps is (10 sec: 48332.5, 60 sec: 48196.2, 300 sec: 48263.4). Total num frames: 488349696. Throughput: 0: 12059.1. Samples: 9553880. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:51,168][03423] Avg episode reward: [(0, '54.805')] -[2024-07-05 15:08:52,814][03976] Updated weights for policy 0, policy_version 62065 (0.0008) -[2024-07-05 15:08:54,518][03976] Updated weights for policy 0, policy_version 62075 (0.0011) -[2024-07-05 15:08:56,167][03423] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 488587264. Throughput: 0: 12058.7. Samples: 9626044. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:08:56,168][03423] Avg episode reward: [(0, '55.455')] -[2024-07-05 15:08:56,218][03976] Updated weights for policy 0, policy_version 62085 (0.0008) -[2024-07-05 15:08:57,918][03976] Updated weights for policy 0, policy_version 62095 (0.0008) -[2024-07-05 15:08:59,604][03976] Updated weights for policy 0, policy_version 62105 (0.0011) -[2024-07-05 15:09:01,167][03423] Fps is (10 sec: 48333.4, 60 sec: 48332.9, 300 sec: 48263.4). Total num frames: 488833024. Throughput: 0: 12083.2. Samples: 9698712. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:09:01,168][03423] Avg episode reward: [(0, '55.733')] -[2024-07-05 15:09:01,282][03976] Updated weights for policy 0, policy_version 62115 (0.0008) -[2024-07-05 15:09:02,960][03976] Updated weights for policy 0, policy_version 62125 (0.0007) -[2024-07-05 15:09:04,649][03976] Updated weights for policy 0, policy_version 62135 (0.0008) -[2024-07-05 15:09:06,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.4, 300 sec: 48235.6). Total num frames: 489070592. Throughput: 0: 12067.2. Samples: 9734644. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:09:06,168][03423] Avg episode reward: [(0, '54.845')] -[2024-07-05 15:09:06,369][03976] Updated weights for policy 0, policy_version 62145 (0.0008) -[2024-07-05 15:09:08,036][03976] Updated weights for policy 0, policy_version 62155 (0.0007) -[2024-07-05 15:09:09,772][03976] Updated weights for policy 0, policy_version 62165 (0.0008) -[2024-07-05 15:09:11,168][03423] Fps is (10 sec: 48331.9, 60 sec: 48332.7, 300 sec: 48263.4). Total num frames: 489316352. Throughput: 0: 12077.3. Samples: 9807312. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:09:11,169][03423] Avg episode reward: [(0, '54.689')] -[2024-07-05 15:09:11,471][03976] Updated weights for policy 0, policy_version 62175 (0.0007) -[2024-07-05 15:09:13,191][03976] Updated weights for policy 0, policy_version 62185 (0.0007) -[2024-07-05 15:09:14,868][03976] Updated weights for policy 0, policy_version 62195 (0.0007) -[2024-07-05 15:09:16,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48332.9, 300 sec: 48235.6). Total num frames: 489553920. Throughput: 0: 12076.1. Samples: 9879672. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:09:16,169][03423] Avg episode reward: [(0, '54.605')] -[2024-07-05 15:09:16,624][03976] Updated weights for policy 0, policy_version 62205 (0.0008) -[2024-07-05 15:09:18,277][03976] Updated weights for policy 0, policy_version 62215 (0.0010) -[2024-07-05 15:09:20,040][03976] Updated weights for policy 0, policy_version 62225 (0.0008) -[2024-07-05 15:09:21,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48263.4). Total num frames: 489799680. Throughput: 0: 12065.0. Samples: 9915296. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:09:21,169][03423] Avg episode reward: [(0, '56.604')] -[2024-07-05 15:09:21,729][03976] Updated weights for policy 0, policy_version 62235 (0.0008) -[2024-07-05 15:09:23,427][03976] Updated weights for policy 0, policy_version 62245 (0.0008) -[2024-07-05 15:09:25,108][03976] Updated weights for policy 0, policy_version 62255 (0.0008) -[2024-07-05 15:09:26,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48332.9, 300 sec: 48235.6). Total num frames: 490037248. Throughput: 0: 12062.4. Samples: 9987708. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:09:26,169][03423] Avg episode reward: [(0, '55.753')] -[2024-07-05 15:09:26,797][03976] Updated weights for policy 0, policy_version 62265 (0.0008) -[2024-07-05 15:09:28,494][03976] Updated weights for policy 0, policy_version 62275 (0.0007) -[2024-07-05 15:09:30,186][03976] Updated weights for policy 0, policy_version 62285 (0.0008) -[2024-07-05 15:09:31,168][03423] Fps is (10 sec: 47513.4, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 490274816. Throughput: 0: 12058.9. Samples: 10060068. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:09:31,169][03423] Avg episode reward: [(0, '53.403')] -[2024-07-05 15:09:31,898][03976] Updated weights for policy 0, policy_version 62295 (0.0007) -[2024-07-05 15:09:33,597][03976] Updated weights for policy 0, policy_version 62305 (0.0007) -[2024-07-05 15:09:35,285][03976] Updated weights for policy 0, policy_version 62315 (0.0008) -[2024-07-05 15:09:36,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48332.7, 300 sec: 48235.6). Total num frames: 490520576. Throughput: 0: 12050.3. Samples: 10096144. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:09:36,168][03423] Avg episode reward: [(0, '56.069')] -[2024-07-05 15:09:36,973][03976] Updated weights for policy 0, policy_version 62325 (0.0008) -[2024-07-05 15:09:38,696][03976] Updated weights for policy 0, policy_version 62335 (0.0009) -[2024-07-05 15:09:40,397][03976] Updated weights for policy 0, policy_version 62345 (0.0008) -[2024-07-05 15:09:41,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48196.1, 300 sec: 48235.6). Total num frames: 490758144. Throughput: 0: 12047.6. Samples: 10168188. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:09:41,169][03423] Avg episode reward: [(0, '52.681')] -[2024-07-05 15:09:42,063][03976] Updated weights for policy 0, policy_version 62355 (0.0007) -[2024-07-05 15:09:43,793][03976] Updated weights for policy 0, policy_version 62365 (0.0010) -[2024-07-05 15:09:45,506][03976] Updated weights for policy 0, policy_version 62375 (0.0008) -[2024-07-05 15:09:46,168][03423] Fps is (10 sec: 47513.7, 60 sec: 48196.2, 300 sec: 48207.9). Total num frames: 490995712. Throughput: 0: 12040.2. Samples: 10240524. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:09:46,169][03423] Avg episode reward: [(0, '54.373')] -[2024-07-05 15:09:47,191][03976] Updated weights for policy 0, policy_version 62385 (0.0008) -[2024-07-05 15:09:48,889][03976] Updated weights for policy 0, policy_version 62395 (0.0009) -[2024-07-05 15:09:50,584][03976] Updated weights for policy 0, policy_version 62405 (0.0008) -[2024-07-05 15:09:51,168][03423] Fps is (10 sec: 48333.4, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 491241472. Throughput: 0: 12050.0. Samples: 10276896. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:09:51,168][03423] Avg episode reward: [(0, '55.625')] -[2024-07-05 15:09:52,304][03976] Updated weights for policy 0, policy_version 62415 (0.0009) -[2024-07-05 15:09:54,005][03976] Updated weights for policy 0, policy_version 62425 (0.0007) -[2024-07-05 15:09:55,728][03976] Updated weights for policy 0, policy_version 62435 (0.0008) -[2024-07-05 15:09:56,168][03423] Fps is (10 sec: 48332.0, 60 sec: 48196.1, 300 sec: 48207.8). Total num frames: 491479040. Throughput: 0: 12039.8. Samples: 10349104. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:09:56,169][03423] Avg episode reward: [(0, '54.838')] -[2024-07-05 15:09:57,381][03976] Updated weights for policy 0, policy_version 62445 (0.0007) -[2024-07-05 15:09:59,092][03976] Updated weights for policy 0, policy_version 62455 (0.0009) -[2024-07-05 15:10:00,769][03976] Updated weights for policy 0, policy_version 62465 (0.0007) -[2024-07-05 15:10:01,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 491724800. Throughput: 0: 12043.5. Samples: 10421628. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:10:01,169][03423] Avg episode reward: [(0, '55.188')] -[2024-07-05 15:10:02,486][03976] Updated weights for policy 0, policy_version 62475 (0.0007) -[2024-07-05 15:10:04,180][03976] Updated weights for policy 0, policy_version 62485 (0.0007) -[2024-07-05 15:10:05,867][03976] Updated weights for policy 0, policy_version 62495 (0.0007) -[2024-07-05 15:10:06,168][03423] Fps is (10 sec: 48333.8, 60 sec: 48196.2, 300 sec: 48207.9). Total num frames: 491962368. Throughput: 0: 12051.6. Samples: 10457620. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:10:06,169][03423] Avg episode reward: [(0, '53.379')] -[2024-07-05 15:10:06,211][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000062497_491970560.pth... -[2024-07-05 15:10:06,276][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000061084_480395264.pth -[2024-07-05 15:10:07,580][03976] Updated weights for policy 0, policy_version 62505 (0.0008) -[2024-07-05 15:10:09,307][03976] Updated weights for policy 0, policy_version 62515 (0.0008) -[2024-07-05 15:10:11,001][03976] Updated weights for policy 0, policy_version 62525 (0.0008) -[2024-07-05 15:10:11,167][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 492208128. Throughput: 0: 12054.0. Samples: 10530136. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:10:11,168][03423] Avg episode reward: [(0, '54.379')] -[2024-07-05 15:10:12,687][03976] Updated weights for policy 0, policy_version 62535 (0.0008) -[2024-07-05 15:10:14,380][03976] Updated weights for policy 0, policy_version 62545 (0.0009) -[2024-07-05 15:10:16,087][03976] Updated weights for policy 0, policy_version 62555 (0.0010) -[2024-07-05 15:10:16,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 492445696. Throughput: 0: 12055.1. Samples: 10602548. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:10:16,169][03423] Avg episode reward: [(0, '54.567')] -[2024-07-05 15:10:17,786][03976] Updated weights for policy 0, policy_version 62565 (0.0008) -[2024-07-05 15:10:19,458][03976] Updated weights for policy 0, policy_version 62575 (0.0008) -[2024-07-05 15:10:21,127][03976] Updated weights for policy 0, policy_version 62585 (0.0009) -[2024-07-05 15:10:21,168][03423] Fps is (10 sec: 48332.2, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 492691456. Throughput: 0: 12058.1. Samples: 10638760. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:10:21,169][03423] Avg episode reward: [(0, '53.577')] -[2024-07-05 15:10:22,881][03976] Updated weights for policy 0, policy_version 62595 (0.0008) -[2024-07-05 15:10:24,547][03976] Updated weights for policy 0, policy_version 62605 (0.0008) -[2024-07-05 15:10:26,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 492929024. Throughput: 0: 12062.3. Samples: 10710992. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:10:26,169][03423] Avg episode reward: [(0, '53.683')] -[2024-07-05 15:10:26,282][03976] Updated weights for policy 0, policy_version 62615 (0.0008) -[2024-07-05 15:10:27,986][03976] Updated weights for policy 0, policy_version 62625 (0.0008) -[2024-07-05 15:10:29,704][03976] Updated weights for policy 0, policy_version 62635 (0.0007) -[2024-07-05 15:10:31,167][03423] Fps is (10 sec: 47514.3, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 493166592. Throughput: 0: 12057.0. Samples: 10783088. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:10:31,168][03423] Avg episode reward: [(0, '58.179')] -[2024-07-05 15:10:31,186][03956] Saving new best policy, reward=58.179! -[2024-07-05 15:10:31,364][03976] Updated weights for policy 0, policy_version 62645 (0.0007) -[2024-07-05 15:10:33,066][03976] Updated weights for policy 0, policy_version 62655 (0.0007) -[2024-07-05 15:10:34,778][03976] Updated weights for policy 0, policy_version 62665 (0.0008) -[2024-07-05 15:10:36,168][03423] Fps is (10 sec: 48333.1, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 493412352. Throughput: 0: 12049.0. Samples: 10819100. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:10:36,169][03423] Avg episode reward: [(0, '52.506')] -[2024-07-05 15:10:36,521][03976] Updated weights for policy 0, policy_version 62675 (0.0007) -[2024-07-05 15:10:38,208][03976] Updated weights for policy 0, policy_version 62685 (0.0007) -[2024-07-05 15:10:39,924][03976] Updated weights for policy 0, policy_version 62695 (0.0008) -[2024-07-05 15:10:41,168][03423] Fps is (10 sec: 48332.1, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 493649920. Throughput: 0: 12051.8. Samples: 10891436. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:10:41,169][03423] Avg episode reward: [(0, '50.890')] -[2024-07-05 15:10:41,589][03976] Updated weights for policy 0, policy_version 62705 (0.0008) -[2024-07-05 15:10:43,286][03976] Updated weights for policy 0, policy_version 62715 (0.0008) -[2024-07-05 15:10:44,967][03976] Updated weights for policy 0, policy_version 62725 (0.0008) -[2024-07-05 15:10:46,168][03423] Fps is (10 sec: 47513.5, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 493887488. Throughput: 0: 12041.9. Samples: 10963516. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:10:46,169][03423] Avg episode reward: [(0, '53.749')] -[2024-07-05 15:10:46,676][03976] Updated weights for policy 0, policy_version 62735 (0.0008) -[2024-07-05 15:10:48,382][03976] Updated weights for policy 0, policy_version 62745 (0.0008) -[2024-07-05 15:10:50,100][03976] Updated weights for policy 0, policy_version 62755 (0.0010) -[2024-07-05 15:10:51,168][03423] Fps is (10 sec: 48333.4, 60 sec: 48196.3, 300 sec: 48207.9). Total num frames: 494133248. Throughput: 0: 12051.7. Samples: 10999948. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:10:51,169][03423] Avg episode reward: [(0, '54.441')] -[2024-07-05 15:10:51,788][03976] Updated weights for policy 0, policy_version 62765 (0.0008) -[2024-07-05 15:10:53,492][03976] Updated weights for policy 0, policy_version 62775 (0.0008) -[2024-07-05 15:10:55,214][03976] Updated weights for policy 0, policy_version 62785 (0.0011) -[2024-07-05 15:10:56,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.4, 300 sec: 48207.8). Total num frames: 494370816. Throughput: 0: 12036.2. Samples: 11071764. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:10:56,169][03423] Avg episode reward: [(0, '55.017')] -[2024-07-05 15:10:56,911][03976] Updated weights for policy 0, policy_version 62795 (0.0007) -[2024-07-05 15:10:58,640][03976] Updated weights for policy 0, policy_version 62805 (0.0007) -[2024-07-05 15:11:00,328][03976] Updated weights for policy 0, policy_version 62815 (0.0008) -[2024-07-05 15:11:01,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.2, 300 sec: 48207.9). Total num frames: 494616576. Throughput: 0: 12030.2. Samples: 11143908. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:11:01,169][03423] Avg episode reward: [(0, '54.722')] -[2024-07-05 15:11:02,015][03976] Updated weights for policy 0, policy_version 62825 (0.0011) -[2024-07-05 15:11:03,697][03976] Updated weights for policy 0, policy_version 62835 (0.0007) -[2024-07-05 15:11:05,401][03976] Updated weights for policy 0, policy_version 62845 (0.0008) -[2024-07-05 15:11:06,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48207.9). Total num frames: 494854144. Throughput: 0: 12031.9. Samples: 11180192. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:11:06,168][03423] Avg episode reward: [(0, '52.284')] -[2024-07-05 15:11:07,085][03976] Updated weights for policy 0, policy_version 62855 (0.0008) -[2024-07-05 15:11:08,770][03976] Updated weights for policy 0, policy_version 62865 (0.0007) -[2024-07-05 15:11:10,498][03976] Updated weights for policy 0, policy_version 62875 (0.0008) -[2024-07-05 15:11:11,168][03423] Fps is (10 sec: 47513.5, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 495091712. Throughput: 0: 12034.5. Samples: 11252544. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:11:11,169][03423] Avg episode reward: [(0, '56.108')] -[2024-07-05 15:11:12,184][03976] Updated weights for policy 0, policy_version 62885 (0.0010) -[2024-07-05 15:11:13,915][03976] Updated weights for policy 0, policy_version 62895 (0.0010) -[2024-07-05 15:11:15,635][03976] Updated weights for policy 0, policy_version 62905 (0.0007) -[2024-07-05 15:11:16,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48207.9). Total num frames: 495337472. Throughput: 0: 12039.5. Samples: 11324868. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:11:16,169][03423] Avg episode reward: [(0, '54.731')] -[2024-07-05 15:11:17,328][03976] Updated weights for policy 0, policy_version 62915 (0.0013) -[2024-07-05 15:11:18,987][03976] Updated weights for policy 0, policy_version 62925 (0.0009) -[2024-07-05 15:11:20,645][03976] Updated weights for policy 0, policy_version 62935 (0.0007) -[2024-07-05 15:11:21,168][03423] Fps is (10 sec: 48332.0, 60 sec: 48059.7, 300 sec: 48207.8). Total num frames: 495575040. Throughput: 0: 12035.8. Samples: 11360712. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:11:21,169][03423] Avg episode reward: [(0, '54.804')] -[2024-07-05 15:11:22,393][03976] Updated weights for policy 0, policy_version 62945 (0.0010) -[2024-07-05 15:11:24,058][03976] Updated weights for policy 0, policy_version 62955 (0.0008) -[2024-07-05 15:11:25,742][03976] Updated weights for policy 0, policy_version 62965 (0.0007) -[2024-07-05 15:11:26,168][03423] Fps is (10 sec: 48332.1, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 495820800. Throughput: 0: 12048.8. Samples: 11433632. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:11:26,169][03423] Avg episode reward: [(0, '53.748')] -[2024-07-05 15:11:27,435][03976] Updated weights for policy 0, policy_version 62975 (0.0008) -[2024-07-05 15:11:29,135][03976] Updated weights for policy 0, policy_version 62985 (0.0008) -[2024-07-05 15:11:30,856][03976] Updated weights for policy 0, policy_version 62995 (0.0008) -[2024-07-05 15:11:31,168][03423] Fps is (10 sec: 48333.6, 60 sec: 48196.2, 300 sec: 48207.9). Total num frames: 496058368. Throughput: 0: 12057.5. Samples: 11506104. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:11:31,169][03423] Avg episode reward: [(0, '54.722')] -[2024-07-05 15:11:32,557][03976] Updated weights for policy 0, policy_version 63005 (0.0010) -[2024-07-05 15:11:34,285][03976] Updated weights for policy 0, policy_version 63015 (0.0010) -[2024-07-05 15:11:35,959][03976] Updated weights for policy 0, policy_version 63025 (0.0011) -[2024-07-05 15:11:36,168][03423] Fps is (10 sec: 48333.2, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 496304128. Throughput: 0: 12056.7. Samples: 11542500. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:11:36,169][03423] Avg episode reward: [(0, '52.422')] -[2024-07-05 15:11:37,686][03976] Updated weights for policy 0, policy_version 63035 (0.0008) -[2024-07-05 15:11:39,347][03976] Updated weights for policy 0, policy_version 63045 (0.0007) -[2024-07-05 15:11:41,033][03976] Updated weights for policy 0, policy_version 63055 (0.0007) -[2024-07-05 15:11:41,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.4, 300 sec: 48207.8). Total num frames: 496541696. Throughput: 0: 12054.7. Samples: 11614224. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:11:41,169][03423] Avg episode reward: [(0, '54.314')] -[2024-07-05 15:11:42,735][03976] Updated weights for policy 0, policy_version 63065 (0.0008) -[2024-07-05 15:11:44,430][03976] Updated weights for policy 0, policy_version 63075 (0.0008) -[2024-07-05 15:11:46,144][03976] Updated weights for policy 0, policy_version 63085 (0.0007) -[2024-07-05 15:11:46,167][03423] Fps is (10 sec: 48333.5, 60 sec: 48332.9, 300 sec: 48235.6). Total num frames: 496787456. Throughput: 0: 12062.6. Samples: 11686724. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:11:46,169][03423] Avg episode reward: [(0, '55.428')] -[2024-07-05 15:11:47,833][03976] Updated weights for policy 0, policy_version 63095 (0.0008) -[2024-07-05 15:11:49,519][03976] Updated weights for policy 0, policy_version 63105 (0.0008) -[2024-07-05 15:11:51,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 497025024. Throughput: 0: 12066.1. Samples: 11723168. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:11:51,168][03423] Avg episode reward: [(0, '56.730')] -[2024-07-05 15:11:51,241][03976] Updated weights for policy 0, policy_version 63115 (0.0007) -[2024-07-05 15:11:52,940][03976] Updated weights for policy 0, policy_version 63125 (0.0009) -[2024-07-05 15:11:54,680][03976] Updated weights for policy 0, policy_version 63135 (0.0008) -[2024-07-05 15:11:56,167][03423] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 497270784. Throughput: 0: 12068.3. Samples: 11795616. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:11:56,168][03423] Avg episode reward: [(0, '55.142')] -[2024-07-05 15:11:56,335][03976] Updated weights for policy 0, policy_version 63145 (0.0008) -[2024-07-05 15:11:58,041][03976] Updated weights for policy 0, policy_version 63155 (0.0008) -[2024-07-05 15:11:59,690][03976] Updated weights for policy 0, policy_version 63165 (0.0008) -[2024-07-05 15:12:01,168][03423] Fps is (10 sec: 48332.1, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 497508352. Throughput: 0: 12069.4. Samples: 11867992. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:12:01,169][03423] Avg episode reward: [(0, '53.514')] -[2024-07-05 15:12:01,385][03976] Updated weights for policy 0, policy_version 63175 (0.0010) -[2024-07-05 15:12:03,080][03976] Updated weights for policy 0, policy_version 63185 (0.0008) -[2024-07-05 15:12:04,793][03976] Updated weights for policy 0, policy_version 63195 (0.0007) -[2024-07-05 15:12:06,167][03423] Fps is (10 sec: 47513.7, 60 sec: 48196.3, 300 sec: 48207.9). Total num frames: 497745920. Throughput: 0: 12078.8. Samples: 11904256. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:12:06,168][03423] Avg episode reward: [(0, '55.977')] -[2024-07-05 15:12:06,187][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000063203_497754112.pth... -[2024-07-05 15:12:06,256][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000061790_486178816.pth -[2024-07-05 15:12:06,535][03976] Updated weights for policy 0, policy_version 63205 (0.0008) -[2024-07-05 15:12:08,221][03976] Updated weights for policy 0, policy_version 63215 (0.0008) -[2024-07-05 15:12:09,934][03976] Updated weights for policy 0, policy_version 63225 (0.0008) -[2024-07-05 15:12:11,168][03423] Fps is (10 sec: 48333.2, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 497991680. Throughput: 0: 12066.8. Samples: 11976636. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:12:11,169][03423] Avg episode reward: [(0, '56.336')] -[2024-07-05 15:12:11,627][03976] Updated weights for policy 0, policy_version 63235 (0.0010) -[2024-07-05 15:12:13,325][03976] Updated weights for policy 0, policy_version 63245 (0.0010) -[2024-07-05 15:12:14,995][03976] Updated weights for policy 0, policy_version 63255 (0.0007) -[2024-07-05 15:12:16,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 498229248. Throughput: 0: 12049.3. Samples: 12048324. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:12:16,169][03423] Avg episode reward: [(0, '54.090')] -[2024-07-05 15:12:16,717][03976] Updated weights for policy 0, policy_version 63265 (0.0007) -[2024-07-05 15:12:18,405][03976] Updated weights for policy 0, policy_version 63275 (0.0007) -[2024-07-05 15:12:20,114][03976] Updated weights for policy 0, policy_version 63285 (0.0008) -[2024-07-05 15:12:21,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48332.9, 300 sec: 48207.9). Total num frames: 498475008. Throughput: 0: 12057.6. Samples: 12085092. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:12:21,169][03423] Avg episode reward: [(0, '55.891')] -[2024-07-05 15:12:21,806][03976] Updated weights for policy 0, policy_version 63295 (0.0008) -[2024-07-05 15:12:23,507][03976] Updated weights for policy 0, policy_version 63305 (0.0007) -[2024-07-05 15:12:25,187][03976] Updated weights for policy 0, policy_version 63315 (0.0008) -[2024-07-05 15:12:26,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.4, 300 sec: 48207.8). Total num frames: 498712576. Throughput: 0: 12069.9. Samples: 12157368. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:12:26,169][03423] Avg episode reward: [(0, '55.394')] -[2024-07-05 15:12:26,860][03976] Updated weights for policy 0, policy_version 63325 (0.0007) -[2024-07-05 15:12:28,582][03976] Updated weights for policy 0, policy_version 63335 (0.0010) -[2024-07-05 15:12:30,297][03976] Updated weights for policy 0, policy_version 63345 (0.0008) -[2024-07-05 15:12:31,168][03423] Fps is (10 sec: 47513.6, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 498950144. Throughput: 0: 12059.2. Samples: 12229388. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:12:31,169][03423] Avg episode reward: [(0, '55.423')] -[2024-07-05 15:12:31,974][03976] Updated weights for policy 0, policy_version 63355 (0.0008) -[2024-07-05 15:12:33,710][03976] Updated weights for policy 0, policy_version 63365 (0.0010) -[2024-07-05 15:12:35,413][03976] Updated weights for policy 0, policy_version 63375 (0.0010) -[2024-07-05 15:12:36,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 499195904. Throughput: 0: 12050.8. Samples: 12265456. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:12:36,170][03423] Avg episode reward: [(0, '54.992')] -[2024-07-05 15:12:37,099][03976] Updated weights for policy 0, policy_version 63385 (0.0010) -[2024-07-05 15:12:38,791][03976] Updated weights for policy 0, policy_version 63395 (0.0008) -[2024-07-05 15:12:40,503][03976] Updated weights for policy 0, policy_version 63405 (0.0007) -[2024-07-05 15:12:41,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48207.9). Total num frames: 499433472. Throughput: 0: 12057.1. Samples: 12338188. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:12:41,169][03423] Avg episode reward: [(0, '54.861')] -[2024-07-05 15:12:42,195][03976] Updated weights for policy 0, policy_version 63415 (0.0008) -[2024-07-05 15:12:43,896][03976] Updated weights for policy 0, policy_version 63425 (0.0008) -[2024-07-05 15:12:45,582][03976] Updated weights for policy 0, policy_version 63435 (0.0010) -[2024-07-05 15:12:46,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.1, 300 sec: 48207.8). Total num frames: 499679232. Throughput: 0: 12052.4. Samples: 12410352. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:12:46,169][03423] Avg episode reward: [(0, '56.215')] -[2024-07-05 15:12:47,308][03976] Updated weights for policy 0, policy_version 63445 (0.0007) -[2024-07-05 15:12:49,051][03976] Updated weights for policy 0, policy_version 63455 (0.0008) -[2024-07-05 15:12:50,779][03976] Updated weights for policy 0, policy_version 63465 (0.0007) -[2024-07-05 15:12:51,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 499916800. Throughput: 0: 12048.0. Samples: 12446420. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:12:51,169][03423] Avg episode reward: [(0, '55.390')] -[2024-07-05 15:12:52,460][03976] Updated weights for policy 0, policy_version 63475 (0.0007) -[2024-07-05 15:12:54,132][03976] Updated weights for policy 0, policy_version 63485 (0.0007) -[2024-07-05 15:12:55,776][03976] Updated weights for policy 0, policy_version 63495 (0.0007) -[2024-07-05 15:12:56,168][03423] Fps is (10 sec: 48333.1, 60 sec: 48196.2, 300 sec: 48235.6). Total num frames: 500162560. Throughput: 0: 12044.0. Samples: 12518616. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:12:56,169][03423] Avg episode reward: [(0, '55.728')] -[2024-07-05 15:12:57,512][03976] Updated weights for policy 0, policy_version 63505 (0.0008) -[2024-07-05 15:12:59,183][03976] Updated weights for policy 0, policy_version 63515 (0.0008) -[2024-07-05 15:13:00,893][03976] Updated weights for policy 0, policy_version 63525 (0.0008) -[2024-07-05 15:13:01,167][03423] Fps is (10 sec: 48333.5, 60 sec: 48196.4, 300 sec: 48207.9). Total num frames: 500400128. Throughput: 0: 12061.8. Samples: 12591104. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:13:01,169][03423] Avg episode reward: [(0, '54.415')] -[2024-07-05 15:13:02,590][03976] Updated weights for policy 0, policy_version 63535 (0.0007) -[2024-07-05 15:13:04,304][03976] Updated weights for policy 0, policy_version 63545 (0.0009) -[2024-07-05 15:13:05,978][03976] Updated weights for policy 0, policy_version 63555 (0.0010) -[2024-07-05 15:13:06,167][03423] Fps is (10 sec: 48333.2, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 500645888. Throughput: 0: 12053.1. Samples: 12627480. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:13:06,169][03423] Avg episode reward: [(0, '52.832')] -[2024-07-05 15:13:07,674][03976] Updated weights for policy 0, policy_version 63565 (0.0008) -[2024-07-05 15:13:09,420][03976] Updated weights for policy 0, policy_version 63575 (0.0008) -[2024-07-05 15:13:11,142][03976] Updated weights for policy 0, policy_version 63585 (0.0008) -[2024-07-05 15:13:11,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 500883456. Throughput: 0: 12053.2. Samples: 12699760. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:13:11,168][03423] Avg episode reward: [(0, '55.619')] -[2024-07-05 15:13:12,802][03976] Updated weights for policy 0, policy_version 63595 (0.0011) -[2024-07-05 15:13:14,503][03976] Updated weights for policy 0, policy_version 63605 (0.0007) -[2024-07-05 15:13:16,168][03423] Fps is (10 sec: 47513.2, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 501121024. Throughput: 0: 12049.3. Samples: 12771608. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:13:16,168][03423] Avg episode reward: [(0, '54.183')] -[2024-07-05 15:13:16,170][03976] Updated weights for policy 0, policy_version 63615 (0.0007) -[2024-07-05 15:13:17,898][03976] Updated weights for policy 0, policy_version 63625 (0.0008) -[2024-07-05 15:13:19,586][03976] Updated weights for policy 0, policy_version 63635 (0.0008) -[2024-07-05 15:13:21,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48235.6). Total num frames: 501366784. Throughput: 0: 12056.5. Samples: 12807996. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:13:21,169][03423] Avg episode reward: [(0, '55.388')] -[2024-07-05 15:13:21,310][03976] Updated weights for policy 0, policy_version 63645 (0.0008) -[2024-07-05 15:13:22,967][03976] Updated weights for policy 0, policy_version 63655 (0.0008) -[2024-07-05 15:13:24,695][03976] Updated weights for policy 0, policy_version 63665 (0.0008) -[2024-07-05 15:13:26,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 501604352. Throughput: 0: 12047.0. Samples: 12880304. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:13:26,169][03423] Avg episode reward: [(0, '56.470')] -[2024-07-05 15:13:26,364][03976] Updated weights for policy 0, policy_version 63675 (0.0009) -[2024-07-05 15:13:28,077][03976] Updated weights for policy 0, policy_version 63685 (0.0008) -[2024-07-05 15:13:29,789][03976] Updated weights for policy 0, policy_version 63695 (0.0008) -[2024-07-05 15:13:31,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48332.9, 300 sec: 48235.6). Total num frames: 501850112. Throughput: 0: 12045.7. Samples: 12952408. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:13:31,169][03423] Avg episode reward: [(0, '55.626')] -[2024-07-05 15:13:31,477][03976] Updated weights for policy 0, policy_version 63705 (0.0008) -[2024-07-05 15:13:33,168][03976] Updated weights for policy 0, policy_version 63715 (0.0008) -[2024-07-05 15:13:34,905][03976] Updated weights for policy 0, policy_version 63725 (0.0007) -[2024-07-05 15:13:36,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 502087680. Throughput: 0: 12046.0. Samples: 12988488. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:13:36,169][03423] Avg episode reward: [(0, '53.110')] -[2024-07-05 15:13:36,622][03976] Updated weights for policy 0, policy_version 63735 (0.0008) -[2024-07-05 15:13:38,345][03976] Updated weights for policy 0, policy_version 63745 (0.0008) -[2024-07-05 15:13:40,050][03976] Updated weights for policy 0, policy_version 63755 (0.0008) -[2024-07-05 15:13:41,168][03423] Fps is (10 sec: 47513.3, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 502325248. Throughput: 0: 12043.8. Samples: 13060588. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:13:41,169][03423] Avg episode reward: [(0, '53.767')] -[2024-07-05 15:13:41,742][03976] Updated weights for policy 0, policy_version 63765 (0.0008) -[2024-07-05 15:13:43,421][03976] Updated weights for policy 0, policy_version 63775 (0.0008) -[2024-07-05 15:13:45,154][03976] Updated weights for policy 0, policy_version 63785 (0.0008) -[2024-07-05 15:13:46,168][03423] Fps is (10 sec: 47513.9, 60 sec: 48059.8, 300 sec: 48180.1). Total num frames: 502562816. Throughput: 0: 12029.3. Samples: 13132424. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:13:46,168][03423] Avg episode reward: [(0, '54.821')] -[2024-07-05 15:13:46,837][03976] Updated weights for policy 0, policy_version 63795 (0.0008) -[2024-07-05 15:13:48,540][03976] Updated weights for policy 0, policy_version 63805 (0.0008) -[2024-07-05 15:13:50,255][03976] Updated weights for policy 0, policy_version 63815 (0.0008) -[2024-07-05 15:13:51,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 502808576. Throughput: 0: 12030.5. Samples: 13168852. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:13:51,169][03423] Avg episode reward: [(0, '55.139')] -[2024-07-05 15:13:51,932][03976] Updated weights for policy 0, policy_version 63825 (0.0007) -[2024-07-05 15:13:53,657][03976] Updated weights for policy 0, policy_version 63835 (0.0008) -[2024-07-05 15:13:55,369][03976] Updated weights for policy 0, policy_version 63845 (0.0007) -[2024-07-05 15:13:56,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48059.7, 300 sec: 48180.0). Total num frames: 503046144. Throughput: 0: 12023.7. Samples: 13240828. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:13:56,169][03423] Avg episode reward: [(0, '52.723')] -[2024-07-05 15:13:57,072][03976] Updated weights for policy 0, policy_version 63855 (0.0007) -[2024-07-05 15:13:58,764][03976] Updated weights for policy 0, policy_version 63865 (0.0008) -[2024-07-05 15:14:00,444][03976] Updated weights for policy 0, policy_version 63875 (0.0010) -[2024-07-05 15:14:01,167][03423] Fps is (10 sec: 48333.1, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 503291904. Throughput: 0: 12033.5. Samples: 13313116. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:14:01,169][03423] Avg episode reward: [(0, '54.511')] -[2024-07-05 15:14:02,159][03976] Updated weights for policy 0, policy_version 63885 (0.0007) -[2024-07-05 15:14:03,868][03976] Updated weights for policy 0, policy_version 63895 (0.0007) -[2024-07-05 15:14:05,549][03976] Updated weights for policy 0, policy_version 63905 (0.0008) -[2024-07-05 15:14:06,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 503529472. Throughput: 0: 12025.1. Samples: 13349124. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:14:06,169][03423] Avg episode reward: [(0, '55.466')] -[2024-07-05 15:14:06,172][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000063908_503529472.pth... -[2024-07-05 15:14:06,244][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000062497_491970560.pth -[2024-07-05 15:14:07,330][03976] Updated weights for policy 0, policy_version 63915 (0.0012) -[2024-07-05 15:14:09,006][03976] Updated weights for policy 0, policy_version 63925 (0.0008) -[2024-07-05 15:14:10,708][03976] Updated weights for policy 0, policy_version 63935 (0.0009) -[2024-07-05 15:14:11,168][03423] Fps is (10 sec: 47513.5, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 503767040. Throughput: 0: 12014.9. Samples: 13420972. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:14:11,169][03423] Avg episode reward: [(0, '54.395')] -[2024-07-05 15:14:12,408][03976] Updated weights for policy 0, policy_version 63945 (0.0008) -[2024-07-05 15:14:14,127][03976] Updated weights for policy 0, policy_version 63955 (0.0008) -[2024-07-05 15:14:15,831][03976] Updated weights for policy 0, policy_version 63965 (0.0008) -[2024-07-05 15:14:16,168][03423] Fps is (10 sec: 47513.5, 60 sec: 48059.7, 300 sec: 48152.3). Total num frames: 504004608. Throughput: 0: 12020.3. Samples: 13493324. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:14:16,168][03423] Avg episode reward: [(0, '52.897')] -[2024-07-05 15:14:17,507][03976] Updated weights for policy 0, policy_version 63975 (0.0010) -[2024-07-05 15:14:19,204][03976] Updated weights for policy 0, policy_version 63985 (0.0008) -[2024-07-05 15:14:20,866][03976] Updated weights for policy 0, policy_version 63995 (0.0008) -[2024-07-05 15:14:21,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 504250368. Throughput: 0: 12020.0. Samples: 13529388. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:14:21,169][03423] Avg episode reward: [(0, '55.078')] -[2024-07-05 15:14:22,574][03976] Updated weights for policy 0, policy_version 64005 (0.0007) -[2024-07-05 15:14:24,307][03976] Updated weights for policy 0, policy_version 64015 (0.0008) -[2024-07-05 15:14:25,983][03976] Updated weights for policy 0, policy_version 64025 (0.0007) -[2024-07-05 15:14:26,168][03423] Fps is (10 sec: 49151.3, 60 sec: 48196.1, 300 sec: 48207.8). Total num frames: 504496128. Throughput: 0: 12028.2. Samples: 13601860. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:14:26,169][03423] Avg episode reward: [(0, '53.699')] -[2024-07-05 15:14:27,667][03976] Updated weights for policy 0, policy_version 64035 (0.0008) -[2024-07-05 15:14:29,377][03976] Updated weights for policy 0, policy_version 64045 (0.0008) -[2024-07-05 15:14:31,070][03976] Updated weights for policy 0, policy_version 64055 (0.0007) -[2024-07-05 15:14:31,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48059.6, 300 sec: 48180.1). Total num frames: 504733696. Throughput: 0: 12042.2. Samples: 13674324. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:14:31,169][03423] Avg episode reward: [(0, '55.690')] -[2024-07-05 15:14:32,785][03976] Updated weights for policy 0, policy_version 64065 (0.0008) -[2024-07-05 15:14:34,478][03976] Updated weights for policy 0, policy_version 64075 (0.0008) -[2024-07-05 15:14:36,167][03423] Fps is (10 sec: 47514.8, 60 sec: 48059.8, 300 sec: 48180.1). Total num frames: 504971264. Throughput: 0: 12036.2. Samples: 13710480. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:14:36,168][03423] Avg episode reward: [(0, '55.837')] -[2024-07-05 15:14:36,176][03976] Updated weights for policy 0, policy_version 64085 (0.0008) -[2024-07-05 15:14:37,856][03976] Updated weights for policy 0, policy_version 64095 (0.0008) -[2024-07-05 15:14:39,591][03976] Updated weights for policy 0, policy_version 64105 (0.0007) -[2024-07-05 15:14:41,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 505217024. Throughput: 0: 12042.3. Samples: 13782732. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:14:41,169][03423] Avg episode reward: [(0, '55.355')] -[2024-07-05 15:14:41,278][03976] Updated weights for policy 0, policy_version 64115 (0.0008) -[2024-07-05 15:14:42,984][03976] Updated weights for policy 0, policy_version 64125 (0.0008) -[2024-07-05 15:14:44,673][03976] Updated weights for policy 0, policy_version 64135 (0.0008) -[2024-07-05 15:14:46,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 505454592. Throughput: 0: 12037.6. Samples: 13854808. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:14:46,168][03423] Avg episode reward: [(0, '55.232')] -[2024-07-05 15:14:46,364][03976] Updated weights for policy 0, policy_version 64145 (0.0010) -[2024-07-05 15:14:48,063][03976] Updated weights for policy 0, policy_version 64155 (0.0007) -[2024-07-05 15:14:49,766][03976] Updated weights for policy 0, policy_version 64165 (0.0007) -[2024-07-05 15:14:51,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48207.9). Total num frames: 505700352. Throughput: 0: 12040.3. Samples: 13890940. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:14:51,168][03423] Avg episode reward: [(0, '54.660')] -[2024-07-05 15:14:51,518][03976] Updated weights for policy 0, policy_version 64175 (0.0008) -[2024-07-05 15:14:53,218][03976] Updated weights for policy 0, policy_version 64185 (0.0008) -[2024-07-05 15:14:54,926][03976] Updated weights for policy 0, policy_version 64195 (0.0008) -[2024-07-05 15:14:56,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 505937920. Throughput: 0: 12049.4. Samples: 13963196. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:14:56,169][03423] Avg episode reward: [(0, '55.059')] -[2024-07-05 15:14:56,605][03976] Updated weights for policy 0, policy_version 64205 (0.0008) -[2024-07-05 15:14:58,321][03976] Updated weights for policy 0, policy_version 64215 (0.0008) -[2024-07-05 15:15:00,032][03976] Updated weights for policy 0, policy_version 64225 (0.0008) -[2024-07-05 15:15:01,167][03423] Fps is (10 sec: 47514.2, 60 sec: 48059.8, 300 sec: 48180.1). Total num frames: 506175488. Throughput: 0: 12043.1. Samples: 14035260. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:15:01,168][03423] Avg episode reward: [(0, '54.999')] -[2024-07-05 15:15:01,700][03976] Updated weights for policy 0, policy_version 64235 (0.0007) -[2024-07-05 15:15:03,413][03976] Updated weights for policy 0, policy_version 64245 (0.0008) -[2024-07-05 15:15:05,124][03976] Updated weights for policy 0, policy_version 64255 (0.0007) -[2024-07-05 15:15:06,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 506421248. Throughput: 0: 12053.1. Samples: 14071776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:15:06,169][03423] Avg episode reward: [(0, '52.713')] -[2024-07-05 15:15:06,795][03976] Updated weights for policy 0, policy_version 64265 (0.0010) -[2024-07-05 15:15:08,485][03976] Updated weights for policy 0, policy_version 64275 (0.0008) -[2024-07-05 15:15:10,185][03976] Updated weights for policy 0, policy_version 64285 (0.0008) -[2024-07-05 15:15:11,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 506658816. Throughput: 0: 12045.6. Samples: 14143912. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:15:11,169][03423] Avg episode reward: [(0, '55.383')] -[2024-07-05 15:15:11,956][03976] Updated weights for policy 0, policy_version 64295 (0.0008) -[2024-07-05 15:15:13,626][03976] Updated weights for policy 0, policy_version 64305 (0.0010) -[2024-07-05 15:15:15,309][03976] Updated weights for policy 0, policy_version 64315 (0.0008) -[2024-07-05 15:15:16,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48332.9, 300 sec: 48180.1). Total num frames: 506904576. Throughput: 0: 12039.1. Samples: 14216080. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:15:16,169][03423] Avg episode reward: [(0, '54.133')] -[2024-07-05 15:15:16,943][03976] Updated weights for policy 0, policy_version 64325 (0.0008) -[2024-07-05 15:15:18,661][03976] Updated weights for policy 0, policy_version 64335 (0.0009) -[2024-07-05 15:15:20,375][03976] Updated weights for policy 0, policy_version 64345 (0.0010) -[2024-07-05 15:15:21,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 507142144. Throughput: 0: 12041.8. Samples: 14252360. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:15:21,169][03423] Avg episode reward: [(0, '55.022')] -[2024-07-05 15:15:22,080][03976] Updated weights for policy 0, policy_version 64355 (0.0008) -[2024-07-05 15:15:23,771][03976] Updated weights for policy 0, policy_version 64365 (0.0007) -[2024-07-05 15:15:25,478][03976] Updated weights for policy 0, policy_version 64375 (0.0008) -[2024-07-05 15:15:26,167][03423] Fps is (10 sec: 48333.2, 60 sec: 48196.5, 300 sec: 48207.8). Total num frames: 507387904. Throughput: 0: 12041.6. Samples: 14324604. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:15:26,168][03423] Avg episode reward: [(0, '53.836')] -[2024-07-05 15:15:27,188][03976] Updated weights for policy 0, policy_version 64385 (0.0008) -[2024-07-05 15:15:28,905][03976] Updated weights for policy 0, policy_version 64395 (0.0008) -[2024-07-05 15:15:30,596][03976] Updated weights for policy 0, policy_version 64405 (0.0008) -[2024-07-05 15:15:31,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.4, 300 sec: 48180.1). Total num frames: 507625472. Throughput: 0: 12048.9. Samples: 14397008. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:15:31,169][03423] Avg episode reward: [(0, '55.105')] -[2024-07-05 15:15:32,286][03976] Updated weights for policy 0, policy_version 64415 (0.0008) -[2024-07-05 15:15:34,029][03976] Updated weights for policy 0, policy_version 64425 (0.0010) -[2024-07-05 15:15:35,733][03976] Updated weights for policy 0, policy_version 64435 (0.0007) -[2024-07-05 15:15:36,167][03423] Fps is (10 sec: 47513.5, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 507863040. Throughput: 0: 12047.6. Samples: 14433080. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:15:36,168][03423] Avg episode reward: [(0, '53.302')] -[2024-07-05 15:15:37,408][03976] Updated weights for policy 0, policy_version 64445 (0.0008) -[2024-07-05 15:15:39,103][03976] Updated weights for policy 0, policy_version 64455 (0.0007) -[2024-07-05 15:15:40,794][03976] Updated weights for policy 0, policy_version 64465 (0.0008) -[2024-07-05 15:15:41,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48207.8). Total num frames: 508108800. Throughput: 0: 12050.7. Samples: 14505476. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:15:41,168][03423] Avg episode reward: [(0, '52.056')] -[2024-07-05 15:15:42,475][03976] Updated weights for policy 0, policy_version 64475 (0.0007) -[2024-07-05 15:15:44,234][03976] Updated weights for policy 0, policy_version 64485 (0.0008) -[2024-07-05 15:15:45,918][03976] Updated weights for policy 0, policy_version 64495 (0.0008) -[2024-07-05 15:15:46,168][03423] Fps is (10 sec: 48331.8, 60 sec: 48196.2, 300 sec: 48180.0). Total num frames: 508346368. Throughput: 0: 12051.2. Samples: 14577568. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:15:46,169][03423] Avg episode reward: [(0, '53.947')] -[2024-07-05 15:15:47,642][03976] Updated weights for policy 0, policy_version 64505 (0.0008) -[2024-07-05 15:15:49,315][03976] Updated weights for policy 0, policy_version 64515 (0.0007) -[2024-07-05 15:15:51,038][03976] Updated weights for policy 0, policy_version 64525 (0.0009) -[2024-07-05 15:15:51,167][03423] Fps is (10 sec: 47513.7, 60 sec: 48059.8, 300 sec: 48180.1). Total num frames: 508583936. Throughput: 0: 12036.5. Samples: 14613416. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:15:51,168][03423] Avg episode reward: [(0, '55.426')] -[2024-07-05 15:15:52,737][03976] Updated weights for policy 0, policy_version 64535 (0.0008) -[2024-07-05 15:15:54,447][03976] Updated weights for policy 0, policy_version 64545 (0.0008) -[2024-07-05 15:15:56,103][03976] Updated weights for policy 0, policy_version 64555 (0.0009) -[2024-07-05 15:15:56,168][03423] Fps is (10 sec: 48333.6, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 508829696. Throughput: 0: 12040.5. Samples: 14685736. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:15:56,169][03423] Avg episode reward: [(0, '54.055')] -[2024-07-05 15:15:57,819][03976] Updated weights for policy 0, policy_version 64565 (0.0007) -[2024-07-05 15:15:59,473][03976] Updated weights for policy 0, policy_version 64575 (0.0010) -[2024-07-05 15:16:01,167][03976] Updated weights for policy 0, policy_version 64585 (0.0007) -[2024-07-05 15:16:01,168][03423] Fps is (10 sec: 49151.5, 60 sec: 48332.7, 300 sec: 48207.8). Total num frames: 509075456. Throughput: 0: 12048.9. Samples: 14758280. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:16:01,176][03423] Avg episode reward: [(0, '55.899')] -[2024-07-05 15:16:02,882][03976] Updated weights for policy 0, policy_version 64595 (0.0009) -[2024-07-05 15:16:04,587][03976] Updated weights for policy 0, policy_version 64605 (0.0007) -[2024-07-05 15:16:06,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48196.2, 300 sec: 48207.8). Total num frames: 509313024. Throughput: 0: 12046.9. Samples: 14794472. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:16:06,169][03423] Avg episode reward: [(0, '53.629')] -[2024-07-05 15:16:06,172][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000064614_509313024.pth... -[2024-07-05 15:16:06,243][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000063203_497754112.pth -[2024-07-05 15:16:06,309][03976] Updated weights for policy 0, policy_version 64615 (0.0008) -[2024-07-05 15:16:07,983][03976] Updated weights for policy 0, policy_version 64625 (0.0008) -[2024-07-05 15:16:09,668][03976] Updated weights for policy 0, policy_version 64635 (0.0010) -[2024-07-05 15:16:11,167][03423] Fps is (10 sec: 47514.1, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 509550592. Throughput: 0: 12040.3. Samples: 14866416. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:16:11,168][03423] Avg episode reward: [(0, '53.446')] -[2024-07-05 15:16:11,395][03976] Updated weights for policy 0, policy_version 64645 (0.0008) -[2024-07-05 15:16:13,082][03976] Updated weights for policy 0, policy_version 64655 (0.0008) -[2024-07-05 15:16:14,813][03976] Updated weights for policy 0, policy_version 64665 (0.0008) -[2024-07-05 15:16:16,167][03423] Fps is (10 sec: 48333.3, 60 sec: 48196.3, 300 sec: 48207.9). Total num frames: 509796352. Throughput: 0: 12043.4. Samples: 14938960. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:16:16,169][03423] Avg episode reward: [(0, '54.381')] -[2024-07-05 15:16:16,478][03976] Updated weights for policy 0, policy_version 64675 (0.0008) -[2024-07-05 15:16:18,192][03976] Updated weights for policy 0, policy_version 64685 (0.0008) -[2024-07-05 15:16:19,933][03976] Updated weights for policy 0, policy_version 64695 (0.0008) -[2024-07-05 15:16:21,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 510033920. Throughput: 0: 12047.2. Samples: 14975204. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:16:21,169][03423] Avg episode reward: [(0, '53.142')] -[2024-07-05 15:16:21,646][03976] Updated weights for policy 0, policy_version 64705 (0.0008) -[2024-07-05 15:16:23,298][03976] Updated weights for policy 0, policy_version 64715 (0.0007) -[2024-07-05 15:16:25,001][03976] Updated weights for policy 0, policy_version 64725 (0.0008) -[2024-07-05 15:16:26,168][03423] Fps is (10 sec: 47513.5, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 510271488. Throughput: 0: 12046.9. Samples: 15047588. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:16:26,169][03423] Avg episode reward: [(0, '56.163')] -[2024-07-05 15:16:26,680][03976] Updated weights for policy 0, policy_version 64735 (0.0010) -[2024-07-05 15:16:28,411][03976] Updated weights for policy 0, policy_version 64745 (0.0007) -[2024-07-05 15:16:30,121][03976] Updated weights for policy 0, policy_version 64755 (0.0007) -[2024-07-05 15:16:31,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 510517248. Throughput: 0: 12043.9. Samples: 15119544. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:16:31,168][03423] Avg episode reward: [(0, '56.777')] -[2024-07-05 15:16:31,816][03976] Updated weights for policy 0, policy_version 64765 (0.0010) -[2024-07-05 15:16:33,520][03976] Updated weights for policy 0, policy_version 64775 (0.0008) -[2024-07-05 15:16:35,216][03976] Updated weights for policy 0, policy_version 64785 (0.0008) -[2024-07-05 15:16:36,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48196.1, 300 sec: 48180.1). Total num frames: 510754816. Throughput: 0: 12054.9. Samples: 15155888. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:16:36,169][03423] Avg episode reward: [(0, '56.172')] -[2024-07-05 15:16:36,902][03976] Updated weights for policy 0, policy_version 64795 (0.0007) -[2024-07-05 15:16:38,585][03976] Updated weights for policy 0, policy_version 64805 (0.0010) -[2024-07-05 15:16:40,255][03976] Updated weights for policy 0, policy_version 64815 (0.0008) -[2024-07-05 15:16:41,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48196.2, 300 sec: 48180.0). Total num frames: 511000576. Throughput: 0: 12056.0. Samples: 15228256. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:16:41,169][03423] Avg episode reward: [(0, '56.989')] -[2024-07-05 15:16:41,986][03976] Updated weights for policy 0, policy_version 64825 (0.0009) -[2024-07-05 15:16:43,718][03976] Updated weights for policy 0, policy_version 64835 (0.0007) -[2024-07-05 15:16:45,429][03976] Updated weights for policy 0, policy_version 64845 (0.0009) -[2024-07-05 15:16:46,167][03423] Fps is (10 sec: 48333.3, 60 sec: 48196.4, 300 sec: 48180.1). Total num frames: 511238144. Throughput: 0: 12049.6. Samples: 15300512. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:16:46,168][03423] Avg episode reward: [(0, '54.426')] -[2024-07-05 15:16:47,131][03976] Updated weights for policy 0, policy_version 64855 (0.0008) -[2024-07-05 15:16:48,799][03976] Updated weights for policy 0, policy_version 64865 (0.0008) -[2024-07-05 15:16:50,500][03976] Updated weights for policy 0, policy_version 64875 (0.0011) -[2024-07-05 15:16:51,168][03423] Fps is (10 sec: 47513.4, 60 sec: 48196.2, 300 sec: 48152.3). Total num frames: 511475712. Throughput: 0: 12045.9. Samples: 15336536. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:16:51,169][03423] Avg episode reward: [(0, '54.223')] -[2024-07-05 15:16:52,223][03976] Updated weights for policy 0, policy_version 64885 (0.0008) -[2024-07-05 15:16:53,935][03976] Updated weights for policy 0, policy_version 64895 (0.0008) -[2024-07-05 15:16:55,608][03976] Updated weights for policy 0, policy_version 64905 (0.0008) -[2024-07-05 15:16:56,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 511721472. Throughput: 0: 12048.5. Samples: 15408600. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) -[2024-07-05 15:16:56,169][03423] Avg episode reward: [(0, '57.584')] -[2024-07-05 15:16:57,315][03976] Updated weights for policy 0, policy_version 64915 (0.0009) -[2024-07-05 15:16:58,997][03976] Updated weights for policy 0, policy_version 64925 (0.0008) -[2024-07-05 15:17:00,688][03976] Updated weights for policy 0, policy_version 64935 (0.0008) -[2024-07-05 15:17:01,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48059.7, 300 sec: 48180.0). Total num frames: 511959040. Throughput: 0: 12038.3. Samples: 15480684. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:01,168][03423] Avg episode reward: [(0, '56.877')] -[2024-07-05 15:17:02,435][03976] Updated weights for policy 0, policy_version 64945 (0.0010) -[2024-07-05 15:17:04,134][03976] Updated weights for policy 0, policy_version 64955 (0.0007) -[2024-07-05 15:17:05,857][03976] Updated weights for policy 0, policy_version 64965 (0.0008) -[2024-07-05 15:17:06,168][03423] Fps is (10 sec: 47513.5, 60 sec: 48059.8, 300 sec: 48152.3). Total num frames: 512196608. Throughput: 0: 12033.0. Samples: 15516688. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:06,169][03423] Avg episode reward: [(0, '53.323')] -[2024-07-05 15:17:07,537][03976] Updated weights for policy 0, policy_version 64975 (0.0008) -[2024-07-05 15:17:09,225][03976] Updated weights for policy 0, policy_version 64985 (0.0010) -[2024-07-05 15:17:10,930][03976] Updated weights for policy 0, policy_version 64995 (0.0008) -[2024-07-05 15:17:11,168][03423] Fps is (10 sec: 48333.3, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 512442368. Throughput: 0: 12040.9. Samples: 15589428. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:11,169][03423] Avg episode reward: [(0, '55.966')] -[2024-07-05 15:17:12,626][03976] Updated weights for policy 0, policy_version 65005 (0.0008) -[2024-07-05 15:17:14,322][03976] Updated weights for policy 0, policy_version 65015 (0.0008) -[2024-07-05 15:17:15,981][03976] Updated weights for policy 0, policy_version 65025 (0.0008) -[2024-07-05 15:17:16,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48059.7, 300 sec: 48152.3). Total num frames: 512679936. Throughput: 0: 12036.8. Samples: 15661200. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:16,168][03423] Avg episode reward: [(0, '55.903')] -[2024-07-05 15:17:17,702][03976] Updated weights for policy 0, policy_version 65035 (0.0010) -[2024-07-05 15:17:19,395][03976] Updated weights for policy 0, policy_version 65045 (0.0008) -[2024-07-05 15:17:21,118][03976] Updated weights for policy 0, policy_version 65055 (0.0007) -[2024-07-05 15:17:21,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 512925696. Throughput: 0: 12044.2. Samples: 15697876. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:21,169][03423] Avg episode reward: [(0, '55.084')] -[2024-07-05 15:17:22,824][03976] Updated weights for policy 0, policy_version 65065 (0.0008) -[2024-07-05 15:17:24,504][03976] Updated weights for policy 0, policy_version 65075 (0.0008) -[2024-07-05 15:17:26,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 513163264. Throughput: 0: 12039.3. Samples: 15770024. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:26,168][03423] Avg episode reward: [(0, '53.838')] -[2024-07-05 15:17:26,188][03976] Updated weights for policy 0, policy_version 65085 (0.0007) -[2024-07-05 15:17:27,902][03976] Updated weights for policy 0, policy_version 65095 (0.0007) -[2024-07-05 15:17:29,592][03976] Updated weights for policy 0, policy_version 65105 (0.0008) -[2024-07-05 15:17:31,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 513409024. Throughput: 0: 12046.7. Samples: 15842612. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:31,169][03423] Avg episode reward: [(0, '54.803')] -[2024-07-05 15:17:31,296][03976] Updated weights for policy 0, policy_version 65115 (0.0007) -[2024-07-05 15:17:32,964][03976] Updated weights for policy 0, policy_version 65125 (0.0008) -[2024-07-05 15:17:34,664][03976] Updated weights for policy 0, policy_version 65135 (0.0007) -[2024-07-05 15:17:36,168][03423] Fps is (10 sec: 48331.4, 60 sec: 48196.1, 300 sec: 48180.0). Total num frames: 513646592. Throughput: 0: 12048.6. Samples: 15878724. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:36,169][03423] Avg episode reward: [(0, '54.701')] -[2024-07-05 15:17:36,386][03976] Updated weights for policy 0, policy_version 65145 (0.0007) -[2024-07-05 15:17:38,084][03976] Updated weights for policy 0, policy_version 65155 (0.0007) -[2024-07-05 15:17:39,811][03976] Updated weights for policy 0, policy_version 65165 (0.0007) -[2024-07-05 15:17:41,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.4, 300 sec: 48180.1). Total num frames: 513892352. Throughput: 0: 12052.6. Samples: 15950968. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:41,168][03423] Avg episode reward: [(0, '54.368')] -[2024-07-05 15:17:41,494][03976] Updated weights for policy 0, policy_version 65175 (0.0007) -[2024-07-05 15:17:43,178][03976] Updated weights for policy 0, policy_version 65185 (0.0013) -[2024-07-05 15:17:44,873][03976] Updated weights for policy 0, policy_version 65195 (0.0008) -[2024-07-05 15:17:46,168][03423] Fps is (10 sec: 48333.7, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 514129920. Throughput: 0: 12059.6. Samples: 16023364. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:46,169][03423] Avg episode reward: [(0, '56.338')] -[2024-07-05 15:17:46,581][03976] Updated weights for policy 0, policy_version 65205 (0.0008) -[2024-07-05 15:17:48,280][03976] Updated weights for policy 0, policy_version 65215 (0.0007) -[2024-07-05 15:17:49,993][03976] Updated weights for policy 0, policy_version 65225 (0.0012) -[2024-07-05 15:17:51,168][03423] Fps is (10 sec: 47512.9, 60 sec: 48196.3, 300 sec: 48152.3). Total num frames: 514367488. Throughput: 0: 12064.3. Samples: 16059584. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:51,169][03423] Avg episode reward: [(0, '56.085')] -[2024-07-05 15:17:51,663][03976] Updated weights for policy 0, policy_version 65235 (0.0008) -[2024-07-05 15:17:53,340][03976] Updated weights for policy 0, policy_version 65245 (0.0008) -[2024-07-05 15:17:55,041][03976] Updated weights for policy 0, policy_version 65255 (0.0008) -[2024-07-05 15:17:56,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 514613248. Throughput: 0: 12058.9. Samples: 16132080. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:17:56,168][03423] Avg episode reward: [(0, '54.156')] -[2024-07-05 15:17:56,755][03976] Updated weights for policy 0, policy_version 65265 (0.0007) -[2024-07-05 15:17:58,486][03976] Updated weights for policy 0, policy_version 65275 (0.0008) -[2024-07-05 15:18:00,163][03976] Updated weights for policy 0, policy_version 65285 (0.0008) -[2024-07-05 15:18:01,168][03423] Fps is (10 sec: 48333.2, 60 sec: 48196.4, 300 sec: 48152.3). Total num frames: 514850816. Throughput: 0: 12070.7. Samples: 16204380. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:18:01,170][03423] Avg episode reward: [(0, '54.161')] -[2024-07-05 15:18:01,925][03976] Updated weights for policy 0, policy_version 65295 (0.0008) -[2024-07-05 15:18:03,585][03976] Updated weights for policy 0, policy_version 65305 (0.0007) -[2024-07-05 15:18:05,266][03976] Updated weights for policy 0, policy_version 65315 (0.0011) -[2024-07-05 15:18:06,168][03423] Fps is (10 sec: 48333.1, 60 sec: 48332.8, 300 sec: 48180.1). Total num frames: 515096576. Throughput: 0: 12062.8. Samples: 16240704. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:18:06,169][03423] Avg episode reward: [(0, '53.099')] -[2024-07-05 15:18:06,173][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000065320_515096576.pth... -[2024-07-05 15:18:06,242][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000063908_503529472.pth -[2024-07-05 15:18:07,000][03976] Updated weights for policy 0, policy_version 65325 (0.0008) -[2024-07-05 15:18:08,687][03976] Updated weights for policy 0, policy_version 65335 (0.0007) -[2024-07-05 15:18:10,385][03976] Updated weights for policy 0, policy_version 65345 (0.0009) -[2024-07-05 15:18:11,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 515334144. Throughput: 0: 12045.8. Samples: 16312084. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:18:11,169][03423] Avg episode reward: [(0, '55.695')] -[2024-07-05 15:18:12,097][03976] Updated weights for policy 0, policy_version 65355 (0.0010) -[2024-07-05 15:18:13,797][03976] Updated weights for policy 0, policy_version 65365 (0.0007) -[2024-07-05 15:18:15,511][03976] Updated weights for policy 0, policy_version 65375 (0.0008) -[2024-07-05 15:18:16,167][03423] Fps is (10 sec: 47513.7, 60 sec: 48196.3, 300 sec: 48152.3). Total num frames: 515571712. Throughput: 0: 12040.8. Samples: 16384448. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:18:16,168][03423] Avg episode reward: [(0, '54.472')] -[2024-07-05 15:18:17,222][03976] Updated weights for policy 0, policy_version 65385 (0.0009) -[2024-07-05 15:18:18,923][03976] Updated weights for policy 0, policy_version 65395 (0.0007) -[2024-07-05 15:18:20,656][03976] Updated weights for policy 0, policy_version 65405 (0.0010) -[2024-07-05 15:18:21,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 515817472. Throughput: 0: 12041.5. Samples: 16420588. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:18:21,169][03423] Avg episode reward: [(0, '55.227')] -[2024-07-05 15:18:22,401][03976] Updated weights for policy 0, policy_version 65415 (0.0008) -[2024-07-05 15:18:24,085][03976] Updated weights for policy 0, policy_version 65425 (0.0008) -[2024-07-05 15:18:25,778][03976] Updated weights for policy 0, policy_version 65435 (0.0008) -[2024-07-05 15:18:26,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48196.2, 300 sec: 48152.3). Total num frames: 516055040. Throughput: 0: 12033.6. Samples: 16492480. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:18:26,169][03423] Avg episode reward: [(0, '53.248')] -[2024-07-05 15:18:27,455][03976] Updated weights for policy 0, policy_version 65445 (0.0008) -[2024-07-05 15:18:29,132][03976] Updated weights for policy 0, policy_version 65455 (0.0009) -[2024-07-05 15:18:30,818][03976] Updated weights for policy 0, policy_version 65465 (0.0010) -[2024-07-05 15:18:31,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.2, 300 sec: 48180.1). Total num frames: 516300800. Throughput: 0: 12032.6. Samples: 16564828. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:18:31,169][03423] Avg episode reward: [(0, '54.144')] -[2024-07-05 15:18:32,527][03976] Updated weights for policy 0, policy_version 65475 (0.0008) -[2024-07-05 15:18:34,203][03976] Updated weights for policy 0, policy_version 65485 (0.0008) -[2024-07-05 15:18:35,955][03976] Updated weights for policy 0, policy_version 65495 (0.0007) -[2024-07-05 15:18:36,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48196.4, 300 sec: 48180.1). Total num frames: 516538368. Throughput: 0: 12033.0. Samples: 16601068. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:18:36,169][03423] Avg episode reward: [(0, '54.059')] -[2024-07-05 15:18:37,593][03976] Updated weights for policy 0, policy_version 65505 (0.0007) -[2024-07-05 15:18:39,328][03976] Updated weights for policy 0, policy_version 65515 (0.0007) -[2024-07-05 15:18:41,008][03976] Updated weights for policy 0, policy_version 65525 (0.0007) -[2024-07-05 15:18:41,168][03423] Fps is (10 sec: 47513.6, 60 sec: 48059.7, 300 sec: 48180.1). Total num frames: 516775936. Throughput: 0: 12023.9. Samples: 16673156. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:18:41,168][03423] Avg episode reward: [(0, '55.517')] -[2024-07-05 15:18:42,701][03976] Updated weights for policy 0, policy_version 65535 (0.0009) -[2024-07-05 15:18:44,429][03976] Updated weights for policy 0, policy_version 65545 (0.0008) -[2024-07-05 15:18:46,133][03976] Updated weights for policy 0, policy_version 65555 (0.0008) -[2024-07-05 15:18:46,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 517021696. Throughput: 0: 12035.2. Samples: 16745964. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:18:46,169][03423] Avg episode reward: [(0, '54.841')] -[2024-07-05 15:18:47,829][03976] Updated weights for policy 0, policy_version 65565 (0.0007) -[2024-07-05 15:18:49,546][03976] Updated weights for policy 0, policy_version 65575 (0.0007) -[2024-07-05 15:18:51,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48196.3, 300 sec: 48180.1). Total num frames: 517259264. Throughput: 0: 12023.9. Samples: 16781780. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:18:51,169][03423] Avg episode reward: [(0, '54.776')] -[2024-07-05 15:18:51,219][03976] Updated weights for policy 0, policy_version 65585 (0.0007) -[2024-07-05 15:18:52,947][03976] Updated weights for policy 0, policy_version 65595 (0.0008) -[2024-07-05 15:18:54,635][03976] Updated weights for policy 0, policy_version 65605 (0.0008) -[2024-07-05 15:18:56,168][03423] Fps is (10 sec: 47514.0, 60 sec: 48059.8, 300 sec: 48152.3). Total num frames: 517496832. Throughput: 0: 12040.7. Samples: 16853916. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:18:56,168][03423] Avg episode reward: [(0, '54.445')] -[2024-07-05 15:18:56,320][03976] Updated weights for policy 0, policy_version 65615 (0.0008) -[2024-07-05 15:18:57,962][03976] Updated weights for policy 0, policy_version 65625 (0.0007) -[2024-07-05 15:18:59,635][03976] Updated weights for policy 0, policy_version 65635 (0.0007) -[2024-07-05 15:19:01,168][03423] Fps is (10 sec: 49151.1, 60 sec: 48332.6, 300 sec: 48207.8). Total num frames: 517750784. Throughput: 0: 12069.8. Samples: 16927592. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:19:01,169][03423] Avg episode reward: [(0, '53.356')] -[2024-07-05 15:19:01,313][03976] Updated weights for policy 0, policy_version 65645 (0.0008) -[2024-07-05 15:19:03,001][03976] Updated weights for policy 0, policy_version 65655 (0.0008) -[2024-07-05 15:19:04,641][03976] Updated weights for policy 0, policy_version 65665 (0.0007) -[2024-07-05 15:19:06,168][03423] Fps is (10 sec: 49971.3, 60 sec: 48332.8, 300 sec: 48235.6). Total num frames: 517996544. Throughput: 0: 12093.6. Samples: 16964800. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:19:06,169][03423] Avg episode reward: [(0, '53.766')] -[2024-07-05 15:19:06,275][03976] Updated weights for policy 0, policy_version 65675 (0.0007) -[2024-07-05 15:19:07,898][03976] Updated weights for policy 0, policy_version 65685 (0.0007) -[2024-07-05 15:19:09,567][03976] Updated weights for policy 0, policy_version 65695 (0.0008) -[2024-07-05 15:19:11,168][03423] Fps is (10 sec: 49153.3, 60 sec: 48469.3, 300 sec: 48263.4). Total num frames: 518242304. Throughput: 0: 12150.7. Samples: 17039260. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:19:11,168][03423] Avg episode reward: [(0, '55.258')] -[2024-07-05 15:19:11,246][03976] Updated weights for policy 0, policy_version 65705 (0.0008) -[2024-07-05 15:19:12,888][03976] Updated weights for policy 0, policy_version 65715 (0.0009) -[2024-07-05 15:19:14,542][03976] Updated weights for policy 0, policy_version 65725 (0.0007) -[2024-07-05 15:19:16,168][03423] Fps is (10 sec: 49151.8, 60 sec: 48605.8, 300 sec: 48263.4). Total num frames: 518488064. Throughput: 0: 12188.8. Samples: 17113324. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:19:16,168][03423] Avg episode reward: [(0, '56.703')] -[2024-07-05 15:19:16,184][03976] Updated weights for policy 0, policy_version 65735 (0.0007) -[2024-07-05 15:19:17,835][03976] Updated weights for policy 0, policy_version 65745 (0.0007) -[2024-07-05 15:19:19,498][03976] Updated weights for policy 0, policy_version 65755 (0.0007) -[2024-07-05 15:19:21,158][03976] Updated weights for policy 0, policy_version 65765 (0.0008) -[2024-07-05 15:19:21,168][03423] Fps is (10 sec: 49970.5, 60 sec: 48742.3, 300 sec: 48291.2). Total num frames: 518742016. Throughput: 0: 12206.7. Samples: 17150368. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:19:21,169][03423] Avg episode reward: [(0, '54.372')] -[2024-07-05 15:19:22,791][03976] Updated weights for policy 0, policy_version 65775 (0.0007) -[2024-07-05 15:19:24,463][03976] Updated weights for policy 0, policy_version 65785 (0.0008) -[2024-07-05 15:19:26,109][03976] Updated weights for policy 0, policy_version 65795 (0.0007) -[2024-07-05 15:19:26,168][03423] Fps is (10 sec: 49971.1, 60 sec: 48878.9, 300 sec: 48318.9). Total num frames: 518987776. Throughput: 0: 12250.4. Samples: 17224424. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:19:26,169][03423] Avg episode reward: [(0, '53.429')] -[2024-07-05 15:19:27,782][03976] Updated weights for policy 0, policy_version 65805 (0.0009) -[2024-07-05 15:19:29,468][03976] Updated weights for policy 0, policy_version 65815 (0.0007) -[2024-07-05 15:19:31,117][03976] Updated weights for policy 0, policy_version 65825 (0.0008) -[2024-07-05 15:19:31,168][03423] Fps is (10 sec: 49152.4, 60 sec: 48878.9, 300 sec: 48346.7). Total num frames: 519233536. Throughput: 0: 12281.1. Samples: 17298612. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:19:31,169][03423] Avg episode reward: [(0, '52.997')] -[2024-07-05 15:19:32,767][03976] Updated weights for policy 0, policy_version 65835 (0.0010) -[2024-07-05 15:19:34,395][03976] Updated weights for policy 0, policy_version 65845 (0.0009) -[2024-07-05 15:19:36,056][03976] Updated weights for policy 0, policy_version 65855 (0.0008) -[2024-07-05 15:19:36,168][03423] Fps is (10 sec: 49151.7, 60 sec: 49015.4, 300 sec: 48346.7). Total num frames: 519479296. Throughput: 0: 12313.4. Samples: 17335884. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:19:36,169][03423] Avg episode reward: [(0, '52.956')] -[2024-07-05 15:19:37,699][03976] Updated weights for policy 0, policy_version 65865 (0.0008) -[2024-07-05 15:19:39,362][03976] Updated weights for policy 0, policy_version 65875 (0.0007) -[2024-07-05 15:19:40,993][03976] Updated weights for policy 0, policy_version 65885 (0.0007) -[2024-07-05 15:19:41,167][03423] Fps is (10 sec: 49152.5, 60 sec: 49152.0, 300 sec: 48374.5). Total num frames: 519725056. Throughput: 0: 12357.4. Samples: 17409996. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:19:41,168][03423] Avg episode reward: [(0, '56.015')] -[2024-07-05 15:19:42,634][03976] Updated weights for policy 0, policy_version 65895 (0.0010) -[2024-07-05 15:19:44,334][03976] Updated weights for policy 0, policy_version 65905 (0.0007) -[2024-07-05 15:19:46,014][03976] Updated weights for policy 0, policy_version 65915 (0.0007) -[2024-07-05 15:19:46,167][03423] Fps is (10 sec: 49152.7, 60 sec: 49152.1, 300 sec: 48374.5). Total num frames: 519970816. Throughput: 0: 12376.3. Samples: 17484524. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:19:46,168][03423] Avg episode reward: [(0, '57.343')] -[2024-07-05 15:19:47,670][03976] Updated weights for policy 0, policy_version 65925 (0.0008) -[2024-07-05 15:19:49,315][03976] Updated weights for policy 0, policy_version 65935 (0.0008) -[2024-07-05 15:19:50,953][03976] Updated weights for policy 0, policy_version 65945 (0.0009) -[2024-07-05 15:19:51,168][03423] Fps is (10 sec: 49967.6, 60 sec: 49424.6, 300 sec: 48429.9). Total num frames: 520224768. Throughput: 0: 12366.8. Samples: 17521316. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:19:51,169][03423] Avg episode reward: [(0, '55.643')] -[2024-07-05 15:19:52,657][03976] Updated weights for policy 0, policy_version 65955 (0.0008) -[2024-07-05 15:19:54,291][03976] Updated weights for policy 0, policy_version 65965 (0.0009) -[2024-07-05 15:19:55,914][03976] Updated weights for policy 0, policy_version 65975 (0.0007) -[2024-07-05 15:19:56,167][03423] Fps is (10 sec: 49971.3, 60 sec: 49561.6, 300 sec: 48457.8). Total num frames: 520470528. Throughput: 0: 12369.1. Samples: 17595868. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:19:56,169][03423] Avg episode reward: [(0, '54.853')] -[2024-07-05 15:19:57,589][03976] Updated weights for policy 0, policy_version 65985 (0.0007) -[2024-07-05 15:19:59,241][03976] Updated weights for policy 0, policy_version 65995 (0.0007) -[2024-07-05 15:20:00,895][03976] Updated weights for policy 0, policy_version 66005 (0.0007) -[2024-07-05 15:20:01,168][03423] Fps is (10 sec: 49154.9, 60 sec: 49425.2, 300 sec: 48457.8). Total num frames: 520716288. Throughput: 0: 12371.8. Samples: 17670056. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:20:01,169][03423] Avg episode reward: [(0, '52.959')] -[2024-07-05 15:20:02,555][03976] Updated weights for policy 0, policy_version 66015 (0.0007) -[2024-07-05 15:20:04,182][03976] Updated weights for policy 0, policy_version 66025 (0.0008) -[2024-07-05 15:20:05,818][03976] Updated weights for policy 0, policy_version 66035 (0.0008) -[2024-07-05 15:20:06,168][03423] Fps is (10 sec: 49970.9, 60 sec: 49561.6, 300 sec: 48513.3). Total num frames: 520970240. Throughput: 0: 12370.8. Samples: 17707052. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:20:06,169][03423] Avg episode reward: [(0, '55.758')] -[2024-07-05 15:20:06,172][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000066037_520970240.pth... -[2024-07-05 15:20:06,234][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000064614_509313024.pth -[2024-07-05 15:20:07,491][03976] Updated weights for policy 0, policy_version 66045 (0.0010) -[2024-07-05 15:20:09,161][03976] Updated weights for policy 0, policy_version 66055 (0.0007) -[2024-07-05 15:20:10,844][03976] Updated weights for policy 0, policy_version 66065 (0.0007) -[2024-07-05 15:20:11,167][03423] Fps is (10 sec: 49971.7, 60 sec: 49561.6, 300 sec: 48513.3). Total num frames: 521216000. Throughput: 0: 12371.1. Samples: 17781124. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:20:11,168][03423] Avg episode reward: [(0, '57.140')] -[2024-07-05 15:20:12,527][03976] Updated weights for policy 0, policy_version 66075 (0.0007) -[2024-07-05 15:20:14,119][03976] Updated weights for policy 0, policy_version 66085 (0.0007) -[2024-07-05 15:20:15,769][03976] Updated weights for policy 0, policy_version 66095 (0.0008) -[2024-07-05 15:20:16,168][03423] Fps is (10 sec: 49152.2, 60 sec: 49561.6, 300 sec: 48541.1). Total num frames: 521461760. Throughput: 0: 12381.6. Samples: 17855784. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:20:16,169][03423] Avg episode reward: [(0, '55.583')] -[2024-07-05 15:20:17,405][03976] Updated weights for policy 0, policy_version 66105 (0.0008) -[2024-07-05 15:20:19,064][03976] Updated weights for policy 0, policy_version 66115 (0.0008) -[2024-07-05 15:20:20,763][03976] Updated weights for policy 0, policy_version 66125 (0.0007) -[2024-07-05 15:20:21,168][03423] Fps is (10 sec: 49151.9, 60 sec: 49425.2, 300 sec: 48541.1). Total num frames: 521707520. Throughput: 0: 12385.1. Samples: 17893212. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:20:21,168][03423] Avg episode reward: [(0, '54.655')] -[2024-07-05 15:20:22,379][03976] Updated weights for policy 0, policy_version 66135 (0.0008) -[2024-07-05 15:20:24,031][03976] Updated weights for policy 0, policy_version 66145 (0.0007) -[2024-07-05 15:20:25,693][03976] Updated weights for policy 0, policy_version 66155 (0.0007) -[2024-07-05 15:20:26,168][03423] Fps is (10 sec: 49152.0, 60 sec: 49425.1, 300 sec: 48568.8). Total num frames: 521953280. Throughput: 0: 12381.0. Samples: 17967140. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:20:26,169][03423] Avg episode reward: [(0, '55.066')] -[2024-07-05 15:20:27,409][03976] Updated weights for policy 0, policy_version 66165 (0.0008) -[2024-07-05 15:20:29,046][03976] Updated weights for policy 0, policy_version 66175 (0.0008) -[2024-07-05 15:20:30,689][03976] Updated weights for policy 0, policy_version 66185 (0.0008) -[2024-07-05 15:20:31,167][03423] Fps is (10 sec: 49152.1, 60 sec: 49425.1, 300 sec: 48596.6). Total num frames: 522199040. Throughput: 0: 12370.8. Samples: 18041208. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:20:31,169][03423] Avg episode reward: [(0, '53.915')] -[2024-07-05 15:20:32,330][03976] Updated weights for policy 0, policy_version 66195 (0.0008) -[2024-07-05 15:20:34,006][03976] Updated weights for policy 0, policy_version 66205 (0.0010) -[2024-07-05 15:20:35,662][03976] Updated weights for policy 0, policy_version 66215 (0.0009) -[2024-07-05 15:20:36,167][03423] Fps is (10 sec: 49971.4, 60 sec: 49561.7, 300 sec: 48624.4). Total num frames: 522452992. Throughput: 0: 12370.1. Samples: 18077960. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:20:36,169][03423] Avg episode reward: [(0, '55.431')] -[2024-07-05 15:20:37,293][03976] Updated weights for policy 0, policy_version 66225 (0.0008) -[2024-07-05 15:20:38,985][03976] Updated weights for policy 0, policy_version 66235 (0.0007) -[2024-07-05 15:20:40,635][03976] Updated weights for policy 0, policy_version 66245 (0.0008) -[2024-07-05 15:20:41,167][03423] Fps is (10 sec: 49971.3, 60 sec: 49561.6, 300 sec: 48652.2). Total num frames: 522698752. Throughput: 0: 12360.2. Samples: 18152076. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:20:41,169][03423] Avg episode reward: [(0, '54.580')] -[2024-07-05 15:20:42,298][03976] Updated weights for policy 0, policy_version 66255 (0.0007) -[2024-07-05 15:20:43,978][03976] Updated weights for policy 0, policy_version 66265 (0.0007) -[2024-07-05 15:20:45,639][03976] Updated weights for policy 0, policy_version 66275 (0.0007) -[2024-07-05 15:20:46,168][03423] Fps is (10 sec: 49151.7, 60 sec: 49561.6, 300 sec: 48679.9). Total num frames: 522944512. Throughput: 0: 12357.8. Samples: 18226156. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:20:46,168][03423] Avg episode reward: [(0, '54.137')] -[2024-07-05 15:20:47,315][03976] Updated weights for policy 0, policy_version 66285 (0.0008) -[2024-07-05 15:20:48,976][03976] Updated weights for policy 0, policy_version 66295 (0.0008) -[2024-07-05 15:20:50,643][03976] Updated weights for policy 0, policy_version 66305 (0.0007) -[2024-07-05 15:20:51,168][03423] Fps is (10 sec: 49151.8, 60 sec: 49425.6, 300 sec: 48679.9). Total num frames: 523190272. Throughput: 0: 12352.3. Samples: 18262904. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:20:51,169][03423] Avg episode reward: [(0, '53.688')] -[2024-07-05 15:20:52,313][03976] Updated weights for policy 0, policy_version 66315 (0.0008) -[2024-07-05 15:20:53,958][03976] Updated weights for policy 0, policy_version 66325 (0.0007) -[2024-07-05 15:20:55,576][03976] Updated weights for policy 0, policy_version 66335 (0.0008) -[2024-07-05 15:20:56,168][03423] Fps is (10 sec: 49151.8, 60 sec: 49425.0, 300 sec: 48679.9). Total num frames: 523436032. Throughput: 0: 12355.9. Samples: 18337140. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:20:56,169][03423] Avg episode reward: [(0, '55.801')] -[2024-07-05 15:20:57,202][03976] Updated weights for policy 0, policy_version 66345 (0.0007) -[2024-07-05 15:20:58,860][03976] Updated weights for policy 0, policy_version 66355 (0.0007) -[2024-07-05 15:21:00,523][03976] Updated weights for policy 0, policy_version 66365 (0.0009) -[2024-07-05 15:21:01,167][03423] Fps is (10 sec: 49152.5, 60 sec: 49425.2, 300 sec: 48707.7). Total num frames: 523681792. Throughput: 0: 12354.7. Samples: 18411744. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:21:01,169][03423] Avg episode reward: [(0, '53.766')] -[2024-07-05 15:21:02,181][03976] Updated weights for policy 0, policy_version 66375 (0.0009) -[2024-07-05 15:21:03,829][03976] Updated weights for policy 0, policy_version 66385 (0.0008) -[2024-07-05 15:21:05,472][03976] Updated weights for policy 0, policy_version 66395 (0.0008) -[2024-07-05 15:21:06,168][03423] Fps is (10 sec: 49971.5, 60 sec: 49425.1, 300 sec: 48763.2). Total num frames: 523935744. Throughput: 0: 12351.8. Samples: 18449044. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:21:06,168][03423] Avg episode reward: [(0, '55.144')] -[2024-07-05 15:21:07,178][03976] Updated weights for policy 0, policy_version 66405 (0.0009) -[2024-07-05 15:21:08,798][03976] Updated weights for policy 0, policy_version 66415 (0.0008) -[2024-07-05 15:21:10,420][03976] Updated weights for policy 0, policy_version 66425 (0.0007) -[2024-07-05 15:21:11,168][03423] Fps is (10 sec: 49970.6, 60 sec: 49425.0, 300 sec: 48763.2). Total num frames: 524181504. Throughput: 0: 12359.6. Samples: 18523324. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:21:11,169][03423] Avg episode reward: [(0, '56.755')] -[2024-07-05 15:21:12,111][03976] Updated weights for policy 0, policy_version 66435 (0.0008) -[2024-07-05 15:21:13,777][03976] Updated weights for policy 0, policy_version 66445 (0.0007) -[2024-07-05 15:21:15,391][03976] Updated weights for policy 0, policy_version 66455 (0.0009) -[2024-07-05 15:21:16,168][03423] Fps is (10 sec: 49151.6, 60 sec: 49425.0, 300 sec: 48791.0). Total num frames: 524427264. Throughput: 0: 12369.8. Samples: 18597852. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:21:16,169][03423] Avg episode reward: [(0, '55.113')] -[2024-07-05 15:21:17,087][03976] Updated weights for policy 0, policy_version 66465 (0.0007) -[2024-07-05 15:21:18,730][03976] Updated weights for policy 0, policy_version 66475 (0.0007) -[2024-07-05 15:21:20,387][03976] Updated weights for policy 0, policy_version 66485 (0.0008) -[2024-07-05 15:21:21,167][03423] Fps is (10 sec: 49152.2, 60 sec: 49425.1, 300 sec: 48818.8). Total num frames: 524673024. Throughput: 0: 12376.3. Samples: 18634892. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:21:21,169][03423] Avg episode reward: [(0, '54.405')] -[2024-07-05 15:21:22,027][03976] Updated weights for policy 0, policy_version 66495 (0.0010) -[2024-07-05 15:21:23,722][03976] Updated weights for policy 0, policy_version 66505 (0.0007) -[2024-07-05 15:21:25,360][03976] Updated weights for policy 0, policy_version 66515 (0.0007) -[2024-07-05 15:21:26,168][03423] Fps is (10 sec: 49971.2, 60 sec: 49561.5, 300 sec: 48846.5). Total num frames: 524926976. Throughput: 0: 12378.0. Samples: 18709088. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:21:26,169][03423] Avg episode reward: [(0, '54.791')] -[2024-07-05 15:21:27,015][03976] Updated weights for policy 0, policy_version 66525 (0.0009) -[2024-07-05 15:21:28,645][03976] Updated weights for policy 0, policy_version 66535 (0.0008) -[2024-07-05 15:21:30,307][03976] Updated weights for policy 0, policy_version 66545 (0.0007) -[2024-07-05 15:21:31,167][03423] Fps is (10 sec: 49971.4, 60 sec: 49561.6, 300 sec: 48874.3). Total num frames: 525172736. Throughput: 0: 12376.6. Samples: 18783100. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:21:31,168][03423] Avg episode reward: [(0, '55.830')] -[2024-07-05 15:21:31,955][03976] Updated weights for policy 0, policy_version 66555 (0.0008) -[2024-07-05 15:21:33,615][03976] Updated weights for policy 0, policy_version 66565 (0.0008) -[2024-07-05 15:21:35,260][03976] Updated weights for policy 0, policy_version 66575 (0.0007) -[2024-07-05 15:21:36,168][03423] Fps is (10 sec: 49152.5, 60 sec: 49425.0, 300 sec: 48874.3). Total num frames: 525418496. Throughput: 0: 12382.1. Samples: 18820100. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:21:36,169][03423] Avg episode reward: [(0, '53.003')] -[2024-07-05 15:21:36,902][03976] Updated weights for policy 0, policy_version 66585 (0.0007) -[2024-07-05 15:21:38,579][03976] Updated weights for policy 0, policy_version 66595 (0.0007) -[2024-07-05 15:21:40,234][03976] Updated weights for policy 0, policy_version 66605 (0.0010) -[2024-07-05 15:21:41,168][03423] Fps is (10 sec: 49151.6, 60 sec: 49425.0, 300 sec: 48902.1). Total num frames: 525664256. Throughput: 0: 12389.6. Samples: 18894672. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:21:41,169][03423] Avg episode reward: [(0, '55.632')] -[2024-07-05 15:21:41,876][03976] Updated weights for policy 0, policy_version 66615 (0.0008) -[2024-07-05 15:21:43,561][03976] Updated weights for policy 0, policy_version 66625 (0.0007) -[2024-07-05 15:21:45,225][03976] Updated weights for policy 0, policy_version 66635 (0.0008) -[2024-07-05 15:21:46,167][03423] Fps is (10 sec: 49152.3, 60 sec: 49425.1, 300 sec: 48929.9). Total num frames: 525910016. Throughput: 0: 12372.4. Samples: 18968504. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:21:46,168][03423] Avg episode reward: [(0, '55.274')] -[2024-07-05 15:21:46,867][03976] Updated weights for policy 0, policy_version 66645 (0.0008) -[2024-07-05 15:21:48,512][03976] Updated weights for policy 0, policy_version 66655 (0.0008) -[2024-07-05 15:21:50,178][03976] Updated weights for policy 0, policy_version 66665 (0.0007) -[2024-07-05 15:21:51,168][03423] Fps is (10 sec: 49152.0, 60 sec: 49425.1, 300 sec: 48929.8). Total num frames: 526155776. Throughput: 0: 12373.2. Samples: 19005840. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:21:51,169][03423] Avg episode reward: [(0, '53.773')] -[2024-07-05 15:21:51,842][03976] Updated weights for policy 0, policy_version 66675 (0.0008) -[2024-07-05 15:21:53,485][03976] Updated weights for policy 0, policy_version 66685 (0.0007) -[2024-07-05 15:21:55,113][03976] Updated weights for policy 0, policy_version 66695 (0.0007) -[2024-07-05 15:21:56,167][03423] Fps is (10 sec: 49971.1, 60 sec: 49561.7, 300 sec: 48985.4). Total num frames: 526409728. Throughput: 0: 12369.8. Samples: 19079964. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:21:56,168][03423] Avg episode reward: [(0, '53.770')] -[2024-07-05 15:21:56,737][03976] Updated weights for policy 0, policy_version 66705 (0.0007) -[2024-07-05 15:21:58,429][03976] Updated weights for policy 0, policy_version 66715 (0.0009) -[2024-07-05 15:22:00,101][03976] Updated weights for policy 0, policy_version 66725 (0.0008) -[2024-07-05 15:22:01,168][03423] Fps is (10 sec: 49971.2, 60 sec: 49561.5, 300 sec: 49013.1). Total num frames: 526655488. Throughput: 0: 12368.5. Samples: 19154432. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:22:01,169][03423] Avg episode reward: [(0, '57.060')] -[2024-07-05 15:22:01,755][03976] Updated weights for policy 0, policy_version 66735 (0.0007) -[2024-07-05 15:22:03,400][03976] Updated weights for policy 0, policy_version 66745 (0.0007) -[2024-07-05 15:22:05,024][03976] Updated weights for policy 0, policy_version 66755 (0.0007) -[2024-07-05 15:22:06,168][03423] Fps is (10 sec: 49151.8, 60 sec: 49425.1, 300 sec: 49013.2). Total num frames: 526901248. Throughput: 0: 12377.0. Samples: 19191856. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:22:06,168][03423] Avg episode reward: [(0, '54.472')] -[2024-07-05 15:22:06,212][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000066762_526909440.pth... -[2024-07-05 15:22:06,278][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000065320_515096576.pth -[2024-07-05 15:22:06,683][03976] Updated weights for policy 0, policy_version 66765 (0.0008) -[2024-07-05 15:22:08,336][03976] Updated weights for policy 0, policy_version 66775 (0.0008) -[2024-07-05 15:22:09,994][03976] Updated weights for policy 0, policy_version 66785 (0.0007) -[2024-07-05 15:22:11,167][03423] Fps is (10 sec: 49971.6, 60 sec: 49561.7, 300 sec: 49068.7). Total num frames: 527155200. Throughput: 0: 12373.1. Samples: 19265876. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:22:11,168][03423] Avg episode reward: [(0, '55.776')] -[2024-07-05 15:22:11,655][03976] Updated weights for policy 0, policy_version 66795 (0.0010) -[2024-07-05 15:22:13,362][03976] Updated weights for policy 0, policy_version 66805 (0.0008) -[2024-07-05 15:22:15,014][03976] Updated weights for policy 0, policy_version 66815 (0.0010) -[2024-07-05 15:22:16,168][03423] Fps is (10 sec: 49971.0, 60 sec: 49561.7, 300 sec: 49068.7). Total num frames: 527400960. Throughput: 0: 12377.5. Samples: 19340088. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:22:16,169][03423] Avg episode reward: [(0, '55.700')] -[2024-07-05 15:22:16,653][03976] Updated weights for policy 0, policy_version 66825 (0.0008) -[2024-07-05 15:22:18,274][03976] Updated weights for policy 0, policy_version 66835 (0.0008) -[2024-07-05 15:22:19,948][03976] Updated weights for policy 0, policy_version 66845 (0.0008) -[2024-07-05 15:22:21,168][03423] Fps is (10 sec: 49151.6, 60 sec: 49561.6, 300 sec: 49096.5). Total num frames: 527646720. Throughput: 0: 12383.5. Samples: 19377356. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:22:21,169][03423] Avg episode reward: [(0, '56.970')] -[2024-07-05 15:22:21,575][03976] Updated weights for policy 0, policy_version 66855 (0.0007) -[2024-07-05 15:22:23,223][03976] Updated weights for policy 0, policy_version 66865 (0.0008) -[2024-07-05 15:22:24,899][03976] Updated weights for policy 0, policy_version 66875 (0.0007) -[2024-07-05 15:22:26,168][03423] Fps is (10 sec: 49151.5, 60 sec: 49425.0, 300 sec: 49096.4). Total num frames: 527892480. Throughput: 0: 12380.1. Samples: 19451780. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:22:26,169][03423] Avg episode reward: [(0, '54.279')] -[2024-07-05 15:22:26,549][03976] Updated weights for policy 0, policy_version 66885 (0.0007) -[2024-07-05 15:22:28,224][03976] Updated weights for policy 0, policy_version 66895 (0.0008) -[2024-07-05 15:22:29,853][03976] Updated weights for policy 0, policy_version 66905 (0.0008) -[2024-07-05 15:22:31,168][03423] Fps is (10 sec: 49152.0, 60 sec: 49425.0, 300 sec: 49124.3). Total num frames: 528138240. Throughput: 0: 12390.0. Samples: 19526056. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:22:31,168][03423] Avg episode reward: [(0, '56.708')] -[2024-07-05 15:22:31,528][03976] Updated weights for policy 0, policy_version 66915 (0.0012) -[2024-07-05 15:22:33,157][03976] Updated weights for policy 0, policy_version 66925 (0.0007) -[2024-07-05 15:22:34,840][03976] Updated weights for policy 0, policy_version 66935 (0.0009) -[2024-07-05 15:22:36,168][03423] Fps is (10 sec: 49152.5, 60 sec: 49425.0, 300 sec: 49124.2). Total num frames: 528384000. Throughput: 0: 12383.8. Samples: 19563112. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:22:36,169][03423] Avg episode reward: [(0, '56.961')] -[2024-07-05 15:22:36,525][03976] Updated weights for policy 0, policy_version 66945 (0.0008) -[2024-07-05 15:22:38,183][03976] Updated weights for policy 0, policy_version 66955 (0.0008) -[2024-07-05 15:22:39,815][03976] Updated weights for policy 0, policy_version 66965 (0.0008) -[2024-07-05 15:22:41,168][03423] Fps is (10 sec: 49971.3, 60 sec: 49561.6, 300 sec: 49179.8). Total num frames: 528637952. Throughput: 0: 12379.5. Samples: 19637040. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:22:41,168][03423] Avg episode reward: [(0, '56.673')] -[2024-07-05 15:22:41,481][03976] Updated weights for policy 0, policy_version 66975 (0.0007) -[2024-07-05 15:22:43,132][03976] Updated weights for policy 0, policy_version 66985 (0.0007) -[2024-07-05 15:22:44,783][03976] Updated weights for policy 0, policy_version 66995 (0.0010) -[2024-07-05 15:22:46,168][03423] Fps is (10 sec: 49971.2, 60 sec: 49561.5, 300 sec: 49207.5). Total num frames: 528883712. Throughput: 0: 12365.6. Samples: 19710884. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:22:46,168][03423] Avg episode reward: [(0, '54.285')] -[2024-07-05 15:22:46,451][03976] Updated weights for policy 0, policy_version 67005 (0.0007) -[2024-07-05 15:22:48,182][03976] Updated weights for policy 0, policy_version 67015 (0.0008) -[2024-07-05 15:22:49,828][03976] Updated weights for policy 0, policy_version 67025 (0.0008) -[2024-07-05 15:22:51,168][03423] Fps is (10 sec: 49151.6, 60 sec: 49561.6, 300 sec: 49207.5). Total num frames: 529129472. Throughput: 0: 12344.5. Samples: 19747360. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:22:51,169][03423] Avg episode reward: [(0, '54.995')] -[2024-07-05 15:22:51,502][03976] Updated weights for policy 0, policy_version 67035 (0.0008) -[2024-07-05 15:22:53,178][03976] Updated weights for policy 0, policy_version 67045 (0.0008) -[2024-07-05 15:22:54,773][03976] Updated weights for policy 0, policy_version 67055 (0.0007) -[2024-07-05 15:22:56,168][03423] Fps is (10 sec: 49151.8, 60 sec: 49425.0, 300 sec: 49235.3). Total num frames: 529375232. Throughput: 0: 12341.2. Samples: 19821232. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:22:56,169][03423] Avg episode reward: [(0, '55.664')] -[2024-07-05 15:22:56,525][03976] Updated weights for policy 0, policy_version 67065 (0.0007) -[2024-07-05 15:22:58,258][03976] Updated weights for policy 0, policy_version 67075 (0.0008) -[2024-07-05 15:22:59,965][03976] Updated weights for policy 0, policy_version 67085 (0.0007) -[2024-07-05 15:23:01,168][03423] Fps is (10 sec: 47513.9, 60 sec: 49152.0, 300 sec: 49179.8). Total num frames: 529604608. Throughput: 0: 12279.7. Samples: 19892676. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:23:01,168][03423] Avg episode reward: [(0, '55.793')] -[2024-07-05 15:23:01,693][03976] Updated weights for policy 0, policy_version 67095 (0.0008) -[2024-07-05 15:23:03,410][03976] Updated weights for policy 0, policy_version 67105 (0.0009) -[2024-07-05 15:23:05,136][03976] Updated weights for policy 0, policy_version 67115 (0.0008) -[2024-07-05 15:23:06,168][03423] Fps is (10 sec: 47513.5, 60 sec: 49151.9, 300 sec: 49207.5). Total num frames: 529850368. Throughput: 0: 12258.9. Samples: 19929008. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:23:06,169][03423] Avg episode reward: [(0, '52.719')] -[2024-07-05 15:23:06,854][03976] Updated weights for policy 0, policy_version 67125 (0.0009) -[2024-07-05 15:23:08,601][03976] Updated weights for policy 0, policy_version 67135 (0.0008) -[2024-07-05 15:23:10,309][03976] Updated weights for policy 0, policy_version 67145 (0.0008) -[2024-07-05 15:23:11,167][03423] Fps is (10 sec: 48333.2, 60 sec: 48878.9, 300 sec: 49207.5). Total num frames: 530087936. Throughput: 0: 12177.7. Samples: 19999776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:23:11,168][03423] Avg episode reward: [(0, '54.391')] -[2024-07-05 15:23:12,019][03976] Updated weights for policy 0, policy_version 67155 (0.0009) -[2024-07-05 15:23:13,761][03976] Updated weights for policy 0, policy_version 67165 (0.0010) -[2024-07-05 15:23:15,468][03976] Updated weights for policy 0, policy_version 67175 (0.0007) -[2024-07-05 15:23:16,168][03423] Fps is (10 sec: 47514.0, 60 sec: 48742.4, 300 sec: 49179.8). Total num frames: 530325504. Throughput: 0: 12118.8. Samples: 20071400. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:23:16,169][03423] Avg episode reward: [(0, '54.320')] -[2024-07-05 15:23:17,185][03976] Updated weights for policy 0, policy_version 67185 (0.0010) -[2024-07-05 15:23:18,869][03976] Updated weights for policy 0, policy_version 67195 (0.0008) -[2024-07-05 15:23:20,592][03976] Updated weights for policy 0, policy_version 67205 (0.0008) -[2024-07-05 15:23:21,168][03423] Fps is (10 sec: 47513.2, 60 sec: 48605.9, 300 sec: 49179.8). Total num frames: 530563072. Throughput: 0: 12096.5. Samples: 20107456. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:23:21,169][03423] Avg episode reward: [(0, '52.604')] -[2024-07-05 15:23:22,288][03976] Updated weights for policy 0, policy_version 67215 (0.0007) -[2024-07-05 15:23:24,021][03976] Updated weights for policy 0, policy_version 67225 (0.0008) -[2024-07-05 15:23:25,728][03976] Updated weights for policy 0, policy_version 67235 (0.0007) -[2024-07-05 15:23:26,168][03423] Fps is (10 sec: 47513.6, 60 sec: 48469.5, 300 sec: 49152.0). Total num frames: 530800640. Throughput: 0: 12043.4. Samples: 20178992. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:23:26,168][03423] Avg episode reward: [(0, '53.274')] -[2024-07-05 15:23:27,429][03976] Updated weights for policy 0, policy_version 67245 (0.0008) -[2024-07-05 15:23:29,185][03976] Updated weights for policy 0, policy_version 67255 (0.0008) -[2024-07-05 15:23:30,916][03976] Updated weights for policy 0, policy_version 67265 (0.0007) -[2024-07-05 15:23:31,167][03423] Fps is (10 sec: 47513.8, 60 sec: 48332.8, 300 sec: 49152.0). Total num frames: 531038208. Throughput: 0: 11993.6. Samples: 20250596. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:23:31,168][03423] Avg episode reward: [(0, '54.034')] -[2024-07-05 15:23:32,612][03976] Updated weights for policy 0, policy_version 67275 (0.0008) -[2024-07-05 15:23:34,308][03976] Updated weights for policy 0, policy_version 67285 (0.0008) -[2024-07-05 15:23:36,009][03976] Updated weights for policy 0, policy_version 67295 (0.0007) -[2024-07-05 15:23:36,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 49179.8). Total num frames: 531283968. Throughput: 0: 11991.8. Samples: 20286992. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:23:36,168][03423] Avg episode reward: [(0, '54.614')] -[2024-07-05 15:23:37,720][03976] Updated weights for policy 0, policy_version 67305 (0.0009) -[2024-07-05 15:23:39,407][03976] Updated weights for policy 0, policy_version 67315 (0.0008) -[2024-07-05 15:23:41,128][03976] Updated weights for policy 0, policy_version 67325 (0.0011) -[2024-07-05 15:23:41,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48059.7, 300 sec: 49152.0). Total num frames: 531521536. Throughput: 0: 11946.0. Samples: 20358800. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:23:41,169][03423] Avg episode reward: [(0, '54.001')] -[2024-07-05 15:23:42,886][03976] Updated weights for policy 0, policy_version 67335 (0.0007) -[2024-07-05 15:23:44,678][03976] Updated weights for policy 0, policy_version 67345 (0.0008) -[2024-07-05 15:23:46,167][03423] Fps is (10 sec: 46694.5, 60 sec: 47786.7, 300 sec: 49124.2). Total num frames: 531750912. Throughput: 0: 11913.8. Samples: 20428796. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:23:46,168][03423] Avg episode reward: [(0, '55.031')] -[2024-07-05 15:23:46,417][03976] Updated weights for policy 0, policy_version 67355 (0.0007) -[2024-07-05 15:23:48,116][03976] Updated weights for policy 0, policy_version 67365 (0.0009) -[2024-07-05 15:23:49,909][03976] Updated weights for policy 0, policy_version 67375 (0.0010) -[2024-07-05 15:23:51,167][03423] Fps is (10 sec: 46694.8, 60 sec: 47650.2, 300 sec: 49124.2). Total num frames: 531988480. Throughput: 0: 11903.9. Samples: 20464680. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:23:51,168][03423] Avg episode reward: [(0, '53.823')] -[2024-07-05 15:23:51,597][03976] Updated weights for policy 0, policy_version 67385 (0.0009) -[2024-07-05 15:23:53,325][03976] Updated weights for policy 0, policy_version 67395 (0.0008) -[2024-07-05 15:23:55,030][03976] Updated weights for policy 0, policy_version 67405 (0.0008) -[2024-07-05 15:23:56,168][03423] Fps is (10 sec: 47513.3, 60 sec: 47513.6, 300 sec: 49068.7). Total num frames: 532226048. Throughput: 0: 11913.7. Samples: 20535892. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:23:56,168][03423] Avg episode reward: [(0, '53.954')] -[2024-07-05 15:23:56,710][03976] Updated weights for policy 0, policy_version 67415 (0.0013) -[2024-07-05 15:23:58,434][03976] Updated weights for policy 0, policy_version 67425 (0.0010) -[2024-07-05 15:24:00,163][03976] Updated weights for policy 0, policy_version 67435 (0.0009) -[2024-07-05 15:24:01,168][03423] Fps is (10 sec: 48332.5, 60 sec: 47786.7, 300 sec: 49068.7). Total num frames: 532471808. Throughput: 0: 11916.4. Samples: 20607636. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:24:01,169][03423] Avg episode reward: [(0, '54.498')] -[2024-07-05 15:24:01,841][03976] Updated weights for policy 0, policy_version 67445 (0.0008) -[2024-07-05 15:24:03,591][03976] Updated weights for policy 0, policy_version 67455 (0.0008) -[2024-07-05 15:24:05,321][03976] Updated weights for policy 0, policy_version 67465 (0.0007) -[2024-07-05 15:24:06,168][03423] Fps is (10 sec: 48333.0, 60 sec: 47650.2, 300 sec: 49040.9). Total num frames: 532709376. Throughput: 0: 11914.8. Samples: 20643620. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:24:06,169][03423] Avg episode reward: [(0, '55.733')] -[2024-07-05 15:24:06,173][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000067470_532709376.pth... -[2024-07-05 15:24:06,241][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000066037_520970240.pth -[2024-07-05 15:24:07,018][03976] Updated weights for policy 0, policy_version 67475 (0.0008) -[2024-07-05 15:24:08,747][03976] Updated weights for policy 0, policy_version 67485 (0.0008) -[2024-07-05 15:24:10,477][03976] Updated weights for policy 0, policy_version 67495 (0.0008) -[2024-07-05 15:24:11,168][03423] Fps is (10 sec: 47513.7, 60 sec: 47650.1, 300 sec: 49013.2). Total num frames: 532946944. Throughput: 0: 11915.4. Samples: 20715184. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:24:11,169][03423] Avg episode reward: [(0, '54.761')] -[2024-07-05 15:24:12,138][03976] Updated weights for policy 0, policy_version 67505 (0.0007) -[2024-07-05 15:24:13,932][03976] Updated weights for policy 0, policy_version 67515 (0.0011) -[2024-07-05 15:24:15,721][03976] Updated weights for policy 0, policy_version 67525 (0.0009) -[2024-07-05 15:24:16,168][03423] Fps is (10 sec: 46694.1, 60 sec: 47513.6, 300 sec: 48929.9). Total num frames: 533176320. Throughput: 0: 11881.6. Samples: 20785268. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:24:16,168][03423] Avg episode reward: [(0, '55.234')] -[2024-07-05 15:24:17,463][03976] Updated weights for policy 0, policy_version 67535 (0.0011) -[2024-07-05 15:24:19,219][03976] Updated weights for policy 0, policy_version 67545 (0.0010) -[2024-07-05 15:24:20,917][03976] Updated weights for policy 0, policy_version 67555 (0.0008) -[2024-07-05 15:24:21,168][03423] Fps is (10 sec: 46694.5, 60 sec: 47513.6, 300 sec: 48902.1). Total num frames: 533413888. Throughput: 0: 11862.5. Samples: 20820804. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:24:21,169][03423] Avg episode reward: [(0, '55.338')] -[2024-07-05 15:24:22,540][03976] Updated weights for policy 0, policy_version 67565 (0.0007) -[2024-07-05 15:24:24,210][03976] Updated weights for policy 0, policy_version 67575 (0.0008) -[2024-07-05 15:24:25,883][03976] Updated weights for policy 0, policy_version 67585 (0.0009) -[2024-07-05 15:24:26,168][03423] Fps is (10 sec: 48332.8, 60 sec: 47650.1, 300 sec: 48902.1). Total num frames: 533659648. Throughput: 0: 11885.0. Samples: 20893624. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:24:26,169][03423] Avg episode reward: [(0, '56.732')] -[2024-07-05 15:24:27,510][03976] Updated weights for policy 0, policy_version 67595 (0.0007) -[2024-07-05 15:24:29,187][03976] Updated weights for policy 0, policy_version 67605 (0.0008) -[2024-07-05 15:24:30,858][03976] Updated weights for policy 0, policy_version 67615 (0.0008) -[2024-07-05 15:24:31,167][03423] Fps is (10 sec: 49152.3, 60 sec: 47786.7, 300 sec: 48902.1). Total num frames: 533905408. Throughput: 0: 11972.5. Samples: 20967556. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:24:31,168][03423] Avg episode reward: [(0, '56.516')] -[2024-07-05 15:24:32,548][03976] Updated weights for policy 0, policy_version 67625 (0.0007) -[2024-07-05 15:24:34,195][03976] Updated weights for policy 0, policy_version 67635 (0.0009) -[2024-07-05 15:24:35,875][03976] Updated weights for policy 0, policy_version 67645 (0.0008) -[2024-07-05 15:24:36,168][03423] Fps is (10 sec: 49152.3, 60 sec: 47786.7, 300 sec: 48902.1). Total num frames: 534151168. Throughput: 0: 11997.6. Samples: 21004572. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:24:36,168][03423] Avg episode reward: [(0, '55.632')] -[2024-07-05 15:24:37,520][03976] Updated weights for policy 0, policy_version 67655 (0.0009) -[2024-07-05 15:24:39,219][03976] Updated weights for policy 0, policy_version 67665 (0.0009) -[2024-07-05 15:24:40,929][03976] Updated weights for policy 0, policy_version 67675 (0.0007) -[2024-07-05 15:24:41,168][03423] Fps is (10 sec: 49151.7, 60 sec: 47923.2, 300 sec: 48902.1). Total num frames: 534396928. Throughput: 0: 12047.0. Samples: 21078008. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:24:41,169][03423] Avg episode reward: [(0, '54.661')] -[2024-07-05 15:24:42,626][03976] Updated weights for policy 0, policy_version 67685 (0.0007) -[2024-07-05 15:24:44,375][03976] Updated weights for policy 0, policy_version 67695 (0.0010) -[2024-07-05 15:24:46,078][03976] Updated weights for policy 0, policy_version 67705 (0.0009) -[2024-07-05 15:24:46,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48059.7, 300 sec: 48846.6). Total num frames: 534634496. Throughput: 0: 12038.5. Samples: 21149368. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:24:46,169][03423] Avg episode reward: [(0, '55.673')] -[2024-07-05 15:24:47,764][03976] Updated weights for policy 0, policy_version 67715 (0.0007) -[2024-07-05 15:24:49,561][03976] Updated weights for policy 0, policy_version 67725 (0.0010) -[2024-07-05 15:24:51,168][03423] Fps is (10 sec: 47513.6, 60 sec: 48059.7, 300 sec: 48818.8). Total num frames: 534872064. Throughput: 0: 12037.0. Samples: 21185284. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:24:51,169][03423] Avg episode reward: [(0, '54.066')] -[2024-07-05 15:24:51,307][03976] Updated weights for policy 0, policy_version 67735 (0.0008) -[2024-07-05 15:24:53,012][03976] Updated weights for policy 0, policy_version 67745 (0.0011) -[2024-07-05 15:24:54,746][03976] Updated weights for policy 0, policy_version 67755 (0.0008) -[2024-07-05 15:24:56,168][03423] Fps is (10 sec: 47513.6, 60 sec: 48059.8, 300 sec: 48791.0). Total num frames: 535109632. Throughput: 0: 12023.1. Samples: 21256224. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:24:56,168][03423] Avg episode reward: [(0, '54.368')] -[2024-07-05 15:24:56,421][03976] Updated weights for policy 0, policy_version 67765 (0.0008) -[2024-07-05 15:24:58,157][03976] Updated weights for policy 0, policy_version 67775 (0.0008) -[2024-07-05 15:24:59,871][03976] Updated weights for policy 0, policy_version 67785 (0.0012) -[2024-07-05 15:25:01,168][03423] Fps is (10 sec: 47513.6, 60 sec: 47923.2, 300 sec: 48735.5). Total num frames: 535347200. Throughput: 0: 12059.7. Samples: 21327956. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:25:01,169][03423] Avg episode reward: [(0, '55.496')] -[2024-07-05 15:25:01,567][03976] Updated weights for policy 0, policy_version 67795 (0.0008) -[2024-07-05 15:25:03,291][03976] Updated weights for policy 0, policy_version 67805 (0.0010) -[2024-07-05 15:25:04,990][03976] Updated weights for policy 0, policy_version 67815 (0.0010) -[2024-07-05 15:25:06,168][03423] Fps is (10 sec: 47513.3, 60 sec: 47923.1, 300 sec: 48707.7). Total num frames: 535584768. Throughput: 0: 12066.0. Samples: 21363776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:25:06,169][03423] Avg episode reward: [(0, '55.863')] -[2024-07-05 15:25:06,674][03976] Updated weights for policy 0, policy_version 67825 (0.0007) -[2024-07-05 15:25:08,372][03976] Updated weights for policy 0, policy_version 67835 (0.0011) -[2024-07-05 15:25:10,094][03976] Updated weights for policy 0, policy_version 67845 (0.0008) -[2024-07-05 15:25:11,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48059.8, 300 sec: 48707.7). Total num frames: 535830528. Throughput: 0: 12051.4. Samples: 21435936. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:25:11,169][03423] Avg episode reward: [(0, '56.822')] -[2024-07-05 15:25:11,854][03976] Updated weights for policy 0, policy_version 67855 (0.0010) -[2024-07-05 15:25:13,563][03976] Updated weights for policy 0, policy_version 67865 (0.0009) -[2024-07-05 15:25:15,275][03976] Updated weights for policy 0, policy_version 67875 (0.0007) -[2024-07-05 15:25:16,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48679.9). Total num frames: 536068096. Throughput: 0: 12000.9. Samples: 21507596. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:25:16,168][03423] Avg episode reward: [(0, '54.833')] -[2024-07-05 15:25:17,005][03976] Updated weights for policy 0, policy_version 67885 (0.0007) -[2024-07-05 15:25:18,708][03976] Updated weights for policy 0, policy_version 67895 (0.0008) -[2024-07-05 15:25:20,418][03976] Updated weights for policy 0, policy_version 67905 (0.0008) -[2024-07-05 15:25:21,168][03423] Fps is (10 sec: 47513.2, 60 sec: 48196.2, 300 sec: 48652.1). Total num frames: 536305664. Throughput: 0: 11974.6. Samples: 21543428. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:25:21,169][03423] Avg episode reward: [(0, '56.318')] -[2024-07-05 15:25:22,092][03976] Updated weights for policy 0, policy_version 67915 (0.0008) -[2024-07-05 15:25:23,791][03976] Updated weights for policy 0, policy_version 67925 (0.0007) -[2024-07-05 15:25:25,519][03976] Updated weights for policy 0, policy_version 67935 (0.0008) -[2024-07-05 15:25:26,168][03423] Fps is (10 sec: 47513.5, 60 sec: 48059.8, 300 sec: 48624.4). Total num frames: 536543232. Throughput: 0: 11938.0. Samples: 21615220. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:25:26,168][03423] Avg episode reward: [(0, '54.006')] -[2024-07-05 15:25:27,220][03976] Updated weights for policy 0, policy_version 67945 (0.0007) -[2024-07-05 15:25:28,947][03976] Updated weights for policy 0, policy_version 67955 (0.0008) -[2024-07-05 15:25:30,678][03976] Updated weights for policy 0, policy_version 67965 (0.0007) -[2024-07-05 15:25:31,167][03423] Fps is (10 sec: 47514.2, 60 sec: 47923.2, 300 sec: 48568.9). Total num frames: 536780800. Throughput: 0: 11944.2. Samples: 21686856. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:25:31,168][03423] Avg episode reward: [(0, '56.192')] -[2024-07-05 15:25:32,367][03976] Updated weights for policy 0, policy_version 67975 (0.0008) -[2024-07-05 15:25:34,078][03976] Updated weights for policy 0, policy_version 67985 (0.0008) -[2024-07-05 15:25:35,780][03976] Updated weights for policy 0, policy_version 67995 (0.0007) -[2024-07-05 15:25:36,168][03423] Fps is (10 sec: 48332.8, 60 sec: 47923.2, 300 sec: 48568.8). Total num frames: 537026560. Throughput: 0: 11951.3. Samples: 21723092. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:25:36,169][03423] Avg episode reward: [(0, '55.047')] -[2024-07-05 15:25:37,465][03976] Updated weights for policy 0, policy_version 68005 (0.0008) -[2024-07-05 15:25:39,181][03976] Updated weights for policy 0, policy_version 68015 (0.0008) -[2024-07-05 15:25:40,913][03976] Updated weights for policy 0, policy_version 68025 (0.0008) -[2024-07-05 15:25:41,167][03423] Fps is (10 sec: 48332.6, 60 sec: 47786.7, 300 sec: 48541.1). Total num frames: 537264128. Throughput: 0: 11976.9. Samples: 21795184. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:25:41,168][03423] Avg episode reward: [(0, '55.353')] -[2024-07-05 15:25:42,616][03976] Updated weights for policy 0, policy_version 68035 (0.0008) -[2024-07-05 15:25:44,302][03976] Updated weights for policy 0, policy_version 68045 (0.0010) -[2024-07-05 15:25:46,038][03976] Updated weights for policy 0, policy_version 68055 (0.0012) -[2024-07-05 15:25:46,167][03423] Fps is (10 sec: 47514.0, 60 sec: 47786.7, 300 sec: 48513.3). Total num frames: 537501696. Throughput: 0: 11974.1. Samples: 21866792. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:25:46,168][03423] Avg episode reward: [(0, '56.248')] -[2024-07-05 15:25:47,717][03976] Updated weights for policy 0, policy_version 68065 (0.0008) -[2024-07-05 15:25:49,412][03976] Updated weights for policy 0, policy_version 68075 (0.0007) -[2024-07-05 15:25:51,137][03976] Updated weights for policy 0, policy_version 68085 (0.0011) -[2024-07-05 15:25:51,168][03423] Fps is (10 sec: 48332.5, 60 sec: 47923.2, 300 sec: 48513.3). Total num frames: 537747456. Throughput: 0: 11983.8. Samples: 21903048. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:25:51,168][03423] Avg episode reward: [(0, '54.817')] -[2024-07-05 15:25:52,861][03976] Updated weights for policy 0, policy_version 68095 (0.0012) -[2024-07-05 15:25:54,554][03976] Updated weights for policy 0, policy_version 68105 (0.0008) -[2024-07-05 15:25:56,167][03423] Fps is (10 sec: 48332.7, 60 sec: 47923.2, 300 sec: 48485.5). Total num frames: 537985024. Throughput: 0: 11982.9. Samples: 21975168. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:25:56,169][03423] Avg episode reward: [(0, '55.256')] -[2024-07-05 15:25:56,276][03976] Updated weights for policy 0, policy_version 68115 (0.0008) -[2024-07-05 15:25:57,997][03976] Updated weights for policy 0, policy_version 68125 (0.0008) -[2024-07-05 15:25:59,668][03976] Updated weights for policy 0, policy_version 68135 (0.0008) -[2024-07-05 15:26:01,168][03423] Fps is (10 sec: 47513.4, 60 sec: 47923.1, 300 sec: 48430.0). Total num frames: 538222592. Throughput: 0: 11989.7. Samples: 22047132. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:26:01,169][03423] Avg episode reward: [(0, '52.506')] -[2024-07-05 15:26:01,380][03976] Updated weights for policy 0, policy_version 68145 (0.0008) -[2024-07-05 15:26:03,084][03976] Updated weights for policy 0, policy_version 68155 (0.0007) -[2024-07-05 15:26:04,906][03976] Updated weights for policy 0, policy_version 68165 (0.0008) -[2024-07-05 15:26:06,167][03423] Fps is (10 sec: 47513.6, 60 sec: 47923.3, 300 sec: 48402.2). Total num frames: 538460160. Throughput: 0: 11981.7. Samples: 22082604. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:26:06,169][03423] Avg episode reward: [(0, '54.164')] -[2024-07-05 15:26:06,173][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000068172_538460160.pth... -[2024-07-05 15:26:06,249][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000066762_526909440.pth -[2024-07-05 15:26:06,781][03976] Updated weights for policy 0, policy_version 68175 (0.0011) -[2024-07-05 15:26:08,537][03976] Updated weights for policy 0, policy_version 68185 (0.0008) -[2024-07-05 15:26:10,262][03976] Updated weights for policy 0, policy_version 68195 (0.0008) -[2024-07-05 15:26:11,168][03423] Fps is (10 sec: 46694.6, 60 sec: 47650.1, 300 sec: 48346.7). Total num frames: 538689536. Throughput: 0: 11914.4. Samples: 22151368. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:26:11,169][03423] Avg episode reward: [(0, '53.707')] -[2024-07-05 15:26:11,969][03976] Updated weights for policy 0, policy_version 68205 (0.0010) -[2024-07-05 15:26:13,760][03976] Updated weights for policy 0, policy_version 68215 (0.0007) -[2024-07-05 15:26:15,845][03976] Updated weights for policy 0, policy_version 68225 (0.0011) -[2024-07-05 15:26:16,168][03423] Fps is (10 sec: 44236.6, 60 sec: 47240.5, 300 sec: 48235.6). Total num frames: 538902528. Throughput: 0: 11796.5. Samples: 22217700. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:26:16,169][03423] Avg episode reward: [(0, '56.379')] -[2024-07-05 15:26:17,697][03976] Updated weights for policy 0, policy_version 68235 (0.0008) -[2024-07-05 15:26:19,630][03976] Updated weights for policy 0, policy_version 68245 (0.0009) -[2024-07-05 15:26:21,168][03423] Fps is (10 sec: 43417.5, 60 sec: 46967.5, 300 sec: 48124.5). Total num frames: 539123712. Throughput: 0: 11724.1. Samples: 22250676. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:26:21,169][03423] Avg episode reward: [(0, '55.464')] -[2024-07-05 15:26:21,521][03976] Updated weights for policy 0, policy_version 68255 (0.0008) -[2024-07-05 15:26:23,493][03976] Updated weights for policy 0, policy_version 68265 (0.0009) -[2024-07-05 15:26:25,412][03976] Updated weights for policy 0, policy_version 68275 (0.0008) -[2024-07-05 15:26:26,167][03423] Fps is (10 sec: 43418.3, 60 sec: 46558.0, 300 sec: 48013.5). Total num frames: 539336704. Throughput: 0: 11533.0. Samples: 22314168. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:26:26,168][03423] Avg episode reward: [(0, '54.309')] -[2024-07-05 15:26:27,172][03976] Updated weights for policy 0, policy_version 68285 (0.0010) -[2024-07-05 15:26:28,889][03976] Updated weights for policy 0, policy_version 68295 (0.0008) -[2024-07-05 15:26:30,573][03976] Updated weights for policy 0, policy_version 68305 (0.0009) -[2024-07-05 15:26:31,167][03423] Fps is (10 sec: 45056.4, 60 sec: 46557.8, 300 sec: 47985.7). Total num frames: 539574272. Throughput: 0: 11492.5. Samples: 22383956. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:26:31,168][03423] Avg episode reward: [(0, '54.048')] -[2024-07-05 15:26:32,292][03976] Updated weights for policy 0, policy_version 68315 (0.0007) -[2024-07-05 15:26:34,020][03976] Updated weights for policy 0, policy_version 68325 (0.0010) -[2024-07-05 15:26:35,755][03976] Updated weights for policy 0, policy_version 68335 (0.0007) -[2024-07-05 15:26:36,168][03423] Fps is (10 sec: 47512.9, 60 sec: 46421.3, 300 sec: 47957.9). Total num frames: 539811840. Throughput: 0: 11481.0. Samples: 22419692. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:26:36,169][03423] Avg episode reward: [(0, '56.287')] -[2024-07-05 15:26:37,512][03976] Updated weights for policy 0, policy_version 68345 (0.0010) -[2024-07-05 15:26:39,330][03976] Updated weights for policy 0, policy_version 68355 (0.0008) -[2024-07-05 15:26:41,088][03976] Updated weights for policy 0, policy_version 68365 (0.0007) -[2024-07-05 15:26:41,167][03423] Fps is (10 sec: 46694.3, 60 sec: 46284.8, 300 sec: 47902.4). Total num frames: 540041216. Throughput: 0: 11424.0. Samples: 22489248. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:26:41,168][03423] Avg episode reward: [(0, '57.197')] -[2024-07-05 15:26:42,983][03976] Updated weights for policy 0, policy_version 68375 (0.0014) -[2024-07-05 15:26:44,757][03976] Updated weights for policy 0, policy_version 68385 (0.0008) -[2024-07-05 15:26:46,167][03423] Fps is (10 sec: 45875.4, 60 sec: 46148.3, 300 sec: 47846.8). Total num frames: 540270592. Throughput: 0: 11354.3. Samples: 22558076. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:26:46,168][03423] Avg episode reward: [(0, '56.295')] -[2024-07-05 15:26:46,475][03976] Updated weights for policy 0, policy_version 68395 (0.0007) -[2024-07-05 15:26:48,327][03976] Updated weights for policy 0, policy_version 68405 (0.0008) -[2024-07-05 15:26:50,130][03976] Updated weights for policy 0, policy_version 68415 (0.0008) -[2024-07-05 15:26:51,168][03423] Fps is (10 sec: 45055.9, 60 sec: 45738.7, 300 sec: 47735.8). Total num frames: 540491776. Throughput: 0: 11316.5. Samples: 22591848. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:26:51,168][03423] Avg episode reward: [(0, '53.432')] -[2024-07-05 15:26:52,007][03976] Updated weights for policy 0, policy_version 68425 (0.0008) -[2024-07-05 15:26:53,912][03976] Updated weights for policy 0, policy_version 68435 (0.0007) -[2024-07-05 15:26:55,869][03976] Updated weights for policy 0, policy_version 68445 (0.0011) -[2024-07-05 15:26:56,168][03423] Fps is (10 sec: 43417.2, 60 sec: 45329.0, 300 sec: 47624.7). Total num frames: 540704768. Throughput: 0: 11256.6. Samples: 22657916. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:26:56,169][03423] Avg episode reward: [(0, '54.352')] -[2024-07-05 15:26:57,599][03976] Updated weights for policy 0, policy_version 68455 (0.0008) -[2024-07-05 15:26:59,484][03976] Updated weights for policy 0, policy_version 68465 (0.0007) -[2024-07-05 15:27:01,167][03423] Fps is (10 sec: 44237.3, 60 sec: 45192.7, 300 sec: 47569.2). Total num frames: 540934144. Throughput: 0: 11267.9. Samples: 22724752. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:27:01,168][03423] Avg episode reward: [(0, '56.262')] -[2024-07-05 15:27:01,335][03976] Updated weights for policy 0, policy_version 68475 (0.0008) -[2024-07-05 15:27:03,053][03976] Updated weights for policy 0, policy_version 68485 (0.0008) -[2024-07-05 15:27:05,125][03976] Updated weights for policy 0, policy_version 68495 (0.0008) -[2024-07-05 15:27:06,168][03423] Fps is (10 sec: 45055.9, 60 sec: 44919.4, 300 sec: 47458.0). Total num frames: 541155328. Throughput: 0: 11286.1. Samples: 22758552. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:27:06,169][03423] Avg episode reward: [(0, '55.896')] -[2024-07-05 15:27:06,849][03976] Updated weights for policy 0, policy_version 68505 (0.0008) -[2024-07-05 15:27:08,800][03976] Updated weights for policy 0, policy_version 68515 (0.0011) -[2024-07-05 15:27:10,871][03976] Updated weights for policy 0, policy_version 68525 (0.0008) -[2024-07-05 15:27:11,167][03423] Fps is (10 sec: 42598.1, 60 sec: 44509.9, 300 sec: 47319.2). Total num frames: 541360128. Throughput: 0: 11303.2. Samples: 22822812. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:27:11,169][03423] Avg episode reward: [(0, '55.856')] -[2024-07-05 15:27:12,724][03976] Updated weights for policy 0, policy_version 68535 (0.0011) -[2024-07-05 15:27:14,633][03976] Updated weights for policy 0, policy_version 68545 (0.0008) -[2024-07-05 15:27:16,167][03423] Fps is (10 sec: 41779.6, 60 sec: 44509.9, 300 sec: 47208.1). Total num frames: 541573120. Throughput: 0: 11153.1. Samples: 22885844. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:27:16,168][03423] Avg episode reward: [(0, '55.563')] -[2024-07-05 15:27:16,549][03976] Updated weights for policy 0, policy_version 68555 (0.0008) -[2024-07-05 15:27:18,292][03976] Updated weights for policy 0, policy_version 68565 (0.0008) -[2024-07-05 15:27:20,415][03976] Updated weights for policy 0, policy_version 68575 (0.0008) -[2024-07-05 15:27:21,168][03423] Fps is (10 sec: 43417.4, 60 sec: 44509.9, 300 sec: 47124.8). Total num frames: 541794304. Throughput: 0: 11116.5. Samples: 22919936. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:27:21,168][03423] Avg episode reward: [(0, '54.945')] -[2024-07-05 15:27:22,351][03976] Updated weights for policy 0, policy_version 68585 (0.0010) -[2024-07-05 15:27:24,062][03976] Updated weights for policy 0, policy_version 68595 (0.0008) -[2024-07-05 15:27:25,817][03976] Updated weights for policy 0, policy_version 68605 (0.0008) -[2024-07-05 15:27:26,168][03423] Fps is (10 sec: 45055.9, 60 sec: 44782.8, 300 sec: 47069.3). Total num frames: 542023680. Throughput: 0: 11012.3. Samples: 22984800. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:27:26,169][03423] Avg episode reward: [(0, '56.474')] -[2024-07-05 15:27:27,590][03976] Updated weights for policy 0, policy_version 68615 (0.0012) -[2024-07-05 15:27:29,387][03976] Updated weights for policy 0, policy_version 68625 (0.0007) -[2024-07-05 15:27:31,168][03423] Fps is (10 sec: 45055.7, 60 sec: 44509.8, 300 sec: 46986.0). Total num frames: 542244864. Throughput: 0: 11003.3. Samples: 23053228. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:27:31,169][03423] Avg episode reward: [(0, '54.863')] -[2024-07-05 15:27:31,304][03976] Updated weights for policy 0, policy_version 68635 (0.0011) -[2024-07-05 15:27:33,375][03976] Updated weights for policy 0, policy_version 68645 (0.0013) -[2024-07-05 15:27:35,859][03976] Updated weights for policy 0, policy_version 68655 (0.0013) -[2024-07-05 15:27:36,168][03423] Fps is (10 sec: 40140.5, 60 sec: 43554.1, 300 sec: 46736.0). Total num frames: 542425088. Throughput: 0: 10923.0. Samples: 23083384. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:27:36,169][03423] Avg episode reward: [(0, '54.023')] -[2024-07-05 15:27:37,640][03976] Updated weights for policy 0, policy_version 68665 (0.0008) -[2024-07-05 15:27:39,347][03976] Updated weights for policy 0, policy_version 68675 (0.0009) -[2024-07-05 15:27:41,051][03976] Updated weights for policy 0, policy_version 68685 (0.0008) -[2024-07-05 15:27:41,167][03423] Fps is (10 sec: 41779.7, 60 sec: 43690.7, 300 sec: 46708.3). Total num frames: 542662656. Throughput: 0: 10816.1. Samples: 23144640. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:27:41,168][03423] Avg episode reward: [(0, '53.049')] -[2024-07-05 15:27:42,979][03976] Updated weights for policy 0, policy_version 68695 (0.0010) -[2024-07-05 15:27:44,732][03976] Updated weights for policy 0, policy_version 68705 (0.0011) -[2024-07-05 15:27:46,170][03423] Fps is (10 sec: 46681.5, 60 sec: 43688.6, 300 sec: 46652.3). Total num frames: 542892032. Throughput: 0: 10849.5. Samples: 23213012. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:27:46,171][03423] Avg episode reward: [(0, '56.057')] -[2024-07-05 15:27:46,528][03976] Updated weights for policy 0, policy_version 68715 (0.0008) -[2024-07-05 15:27:48,262][03976] Updated weights for policy 0, policy_version 68725 (0.0008) -[2024-07-05 15:27:50,066][03976] Updated weights for policy 0, policy_version 68735 (0.0007) -[2024-07-05 15:27:51,168][03423] Fps is (10 sec: 45874.4, 60 sec: 43827.1, 300 sec: 46597.2). Total num frames: 543121408. Throughput: 0: 10885.0. Samples: 23248376. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:27:51,169][03423] Avg episode reward: [(0, '54.837')] -[2024-07-05 15:27:51,744][03976] Updated weights for policy 0, policy_version 68745 (0.0008) -[2024-07-05 15:27:53,502][03976] Updated weights for policy 0, policy_version 68755 (0.0008) -[2024-07-05 15:27:55,185][03976] Updated weights for policy 0, policy_version 68765 (0.0008) -[2024-07-05 15:27:56,168][03423] Fps is (10 sec: 46707.5, 60 sec: 44236.8, 300 sec: 46625.0). Total num frames: 543358976. Throughput: 0: 11028.0. Samples: 23319072. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:27:56,169][03423] Avg episode reward: [(0, '55.424')] -[2024-07-05 15:27:57,053][03976] Updated weights for policy 0, policy_version 68775 (0.0007) -[2024-07-05 15:27:58,984][03976] Updated weights for policy 0, policy_version 68785 (0.0008) -[2024-07-05 15:28:00,838][03976] Updated weights for policy 0, policy_version 68795 (0.0010) -[2024-07-05 15:28:01,168][03423] Fps is (10 sec: 45056.4, 60 sec: 43963.6, 300 sec: 46513.9). Total num frames: 543571968. Throughput: 0: 11104.3. Samples: 23385540. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:28:01,169][03423] Avg episode reward: [(0, '57.057')] -[2024-07-05 15:28:02,654][03976] Updated weights for policy 0, policy_version 68805 (0.0008) -[2024-07-05 15:28:04,630][03976] Updated weights for policy 0, policy_version 68815 (0.0008) -[2024-07-05 15:28:06,168][03423] Fps is (10 sec: 43417.6, 60 sec: 43963.8, 300 sec: 46458.3). Total num frames: 543793152. Throughput: 0: 11088.4. Samples: 23418912. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:28:06,169][03423] Avg episode reward: [(0, '54.419')] -[2024-07-05 15:28:06,173][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000068823_543793152.pth... -[2024-07-05 15:28:06,245][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000067470_532709376.pth -[2024-07-05 15:28:06,510][03976] Updated weights for policy 0, policy_version 68825 (0.0010) -[2024-07-05 15:28:08,359][03976] Updated weights for policy 0, policy_version 68835 (0.0008) -[2024-07-05 15:28:10,210][03976] Updated weights for policy 0, policy_version 68845 (0.0008) -[2024-07-05 15:28:11,168][03423] Fps is (10 sec: 44236.3, 60 sec: 44236.7, 300 sec: 46402.8). Total num frames: 544014336. Throughput: 0: 11070.5. Samples: 23482976. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:28:11,169][03423] Avg episode reward: [(0, '53.533')] -[2024-07-05 15:28:11,945][03976] Updated weights for policy 0, policy_version 68855 (0.0008) -[2024-07-05 15:28:13,721][03976] Updated weights for policy 0, policy_version 68865 (0.0008) -[2024-07-05 15:28:15,436][03976] Updated weights for policy 0, policy_version 68875 (0.0010) -[2024-07-05 15:28:16,168][03423] Fps is (10 sec: 45874.9, 60 sec: 44646.3, 300 sec: 46402.8). Total num frames: 544251904. Throughput: 0: 11114.8. Samples: 23553396. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:28:16,169][03423] Avg episode reward: [(0, '55.332')] -[2024-07-05 15:28:17,163][03976] Updated weights for policy 0, policy_version 68885 (0.0008) -[2024-07-05 15:28:18,957][03976] Updated weights for policy 0, policy_version 68895 (0.0008) -[2024-07-05 15:28:20,657][03976] Updated weights for policy 0, policy_version 68905 (0.0012) -[2024-07-05 15:28:21,167][03423] Fps is (10 sec: 46695.3, 60 sec: 44783.0, 300 sec: 46375.1). Total num frames: 544481280. Throughput: 0: 11218.2. Samples: 23588204. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:28:21,168][03423] Avg episode reward: [(0, '54.872')] -[2024-07-05 15:28:22,418][03976] Updated weights for policy 0, policy_version 68915 (0.0007) -[2024-07-05 15:28:24,197][03976] Updated weights for policy 0, policy_version 68925 (0.0011) -[2024-07-05 15:28:25,953][03976] Updated weights for policy 0, policy_version 68935 (0.0010) -[2024-07-05 15:28:26,168][03423] Fps is (10 sec: 46694.6, 60 sec: 44919.4, 300 sec: 46375.0). Total num frames: 544718848. Throughput: 0: 11424.1. Samples: 23658724. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:28:26,169][03423] Avg episode reward: [(0, '55.391')] -[2024-07-05 15:28:27,675][03976] Updated weights for policy 0, policy_version 68945 (0.0007) -[2024-07-05 15:28:29,401][03976] Updated weights for policy 0, policy_version 68955 (0.0007) -[2024-07-05 15:28:31,168][03976] Updated weights for policy 0, policy_version 68965 (0.0008) -[2024-07-05 15:28:31,167][03423] Fps is (10 sec: 47513.9, 60 sec: 45192.7, 300 sec: 46347.3). Total num frames: 544956416. Throughput: 0: 11471.7. Samples: 23729204. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:28:31,169][03423] Avg episode reward: [(0, '55.822')] -[2024-07-05 15:28:32,882][03976] Updated weights for policy 0, policy_version 68975 (0.0007) -[2024-07-05 15:28:34,604][03976] Updated weights for policy 0, policy_version 68985 (0.0011) -[2024-07-05 15:28:36,167][03423] Fps is (10 sec: 46694.7, 60 sec: 46011.8, 300 sec: 46319.5). Total num frames: 545185792. Throughput: 0: 11475.4. Samples: 23764768. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:28:36,168][03423] Avg episode reward: [(0, '56.523')] -[2024-07-05 15:28:36,335][03976] Updated weights for policy 0, policy_version 68995 (0.0008) -[2024-07-05 15:28:38,119][03976] Updated weights for policy 0, policy_version 69005 (0.0008) -[2024-07-05 15:28:39,837][03976] Updated weights for policy 0, policy_version 69015 (0.0007) -[2024-07-05 15:28:41,167][03423] Fps is (10 sec: 46694.6, 60 sec: 46011.8, 300 sec: 46347.3). Total num frames: 545423360. Throughput: 0: 11475.9. Samples: 23835484. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:28:41,168][03423] Avg episode reward: [(0, '54.881')] -[2024-07-05 15:28:41,525][03976] Updated weights for policy 0, policy_version 69025 (0.0007) -[2024-07-05 15:28:43,302][03976] Updated weights for policy 0, policy_version 69035 (0.0012) -[2024-07-05 15:28:45,038][03976] Updated weights for policy 0, policy_version 69045 (0.0008) -[2024-07-05 15:28:46,168][03423] Fps is (10 sec: 47512.8, 60 sec: 46150.3, 300 sec: 46347.3). Total num frames: 545660928. Throughput: 0: 11582.9. Samples: 23906772. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:28:46,169][03423] Avg episode reward: [(0, '55.764')] -[2024-07-05 15:28:46,758][03976] Updated weights for policy 0, policy_version 69055 (0.0009) -[2024-07-05 15:28:48,460][03976] Updated weights for policy 0, policy_version 69065 (0.0008) -[2024-07-05 15:28:50,135][03976] Updated weights for policy 0, policy_version 69075 (0.0008) -[2024-07-05 15:28:51,167][03423] Fps is (10 sec: 48332.2, 60 sec: 46421.5, 300 sec: 46375.1). Total num frames: 545906688. Throughput: 0: 11638.6. Samples: 23942648. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:28:51,168][03423] Avg episode reward: [(0, '52.821')] -[2024-07-05 15:28:51,841][03976] Updated weights for policy 0, policy_version 69085 (0.0011) -[2024-07-05 15:28:53,604][03976] Updated weights for policy 0, policy_version 69095 (0.0007) -[2024-07-05 15:28:55,333][03976] Updated weights for policy 0, policy_version 69105 (0.0008) -[2024-07-05 15:28:56,167][03423] Fps is (10 sec: 48333.5, 60 sec: 46421.4, 300 sec: 46347.3). Total num frames: 546144256. Throughput: 0: 11802.4. Samples: 24014084. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:28:56,169][03423] Avg episode reward: [(0, '53.889')] -[2024-07-05 15:28:57,110][03976] Updated weights for policy 0, policy_version 69115 (0.0007) -[2024-07-05 15:28:58,850][03976] Updated weights for policy 0, policy_version 69125 (0.0008) -[2024-07-05 15:29:00,644][03976] Updated weights for policy 0, policy_version 69135 (0.0008) -[2024-07-05 15:29:01,168][03423] Fps is (10 sec: 46694.3, 60 sec: 46694.4, 300 sec: 46319.5). Total num frames: 546373632. Throughput: 0: 11799.0. Samples: 24084352. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:29:01,169][03423] Avg episode reward: [(0, '54.327')] -[2024-07-05 15:29:02,362][03976] Updated weights for policy 0, policy_version 69145 (0.0015) -[2024-07-05 15:29:04,090][03976] Updated weights for policy 0, policy_version 69155 (0.0008) -[2024-07-05 15:29:05,870][03976] Updated weights for policy 0, policy_version 69165 (0.0008) -[2024-07-05 15:29:06,167][03423] Fps is (10 sec: 45875.3, 60 sec: 46831.0, 300 sec: 46291.7). Total num frames: 546603008. Throughput: 0: 11800.8. Samples: 24119240. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:29:06,169][03423] Avg episode reward: [(0, '55.924')] -[2024-07-05 15:29:07,583][03976] Updated weights for policy 0, policy_version 69175 (0.0013) -[2024-07-05 15:29:09,354][03976] Updated weights for policy 0, policy_version 69185 (0.0008) -[2024-07-05 15:29:11,010][03976] Updated weights for policy 0, policy_version 69195 (0.0009) -[2024-07-05 15:29:11,168][03423] Fps is (10 sec: 46694.3, 60 sec: 47104.1, 300 sec: 46319.5). Total num frames: 546840576. Throughput: 0: 11795.6. Samples: 24189524. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:29:11,168][03423] Avg episode reward: [(0, '56.601')] -[2024-07-05 15:29:12,793][03976] Updated weights for policy 0, policy_version 69205 (0.0008) -[2024-07-05 15:29:14,741][03976] Updated weights for policy 0, policy_version 69215 (0.0014) -[2024-07-05 15:29:16,168][03423] Fps is (10 sec: 46694.3, 60 sec: 46967.5, 300 sec: 46291.7). Total num frames: 547069952. Throughput: 0: 11745.0. Samples: 24257732. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:29:16,169][03423] Avg episode reward: [(0, '56.641')] -[2024-07-05 15:29:16,505][03976] Updated weights for policy 0, policy_version 69225 (0.0010) -[2024-07-05 15:29:18,249][03976] Updated weights for policy 0, policy_version 69235 (0.0007) -[2024-07-05 15:29:19,944][03976] Updated weights for policy 0, policy_version 69245 (0.0008) -[2024-07-05 15:29:21,167][03423] Fps is (10 sec: 46694.8, 60 sec: 47104.0, 300 sec: 46264.0). Total num frames: 547307520. Throughput: 0: 11745.3. Samples: 24293304. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:29:21,168][03423] Avg episode reward: [(0, '54.081')] -[2024-07-05 15:29:21,688][03976] Updated weights for policy 0, policy_version 69255 (0.0008) -[2024-07-05 15:29:23,424][03976] Updated weights for policy 0, policy_version 69265 (0.0012) -[2024-07-05 15:29:25,176][03976] Updated weights for policy 0, policy_version 69275 (0.0007) -[2024-07-05 15:29:26,168][03423] Fps is (10 sec: 46694.2, 60 sec: 46967.5, 300 sec: 46208.4). Total num frames: 547536896. Throughput: 0: 11755.0. Samples: 24364460. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:29:26,169][03423] Avg episode reward: [(0, '55.838')] -[2024-07-05 15:29:26,930][03976] Updated weights for policy 0, policy_version 69285 (0.0010) -[2024-07-05 15:29:28,667][03976] Updated weights for policy 0, policy_version 69295 (0.0009) -[2024-07-05 15:29:30,362][03976] Updated weights for policy 0, policy_version 69305 (0.0008) -[2024-07-05 15:29:31,168][03423] Fps is (10 sec: 46693.9, 60 sec: 46967.4, 300 sec: 46180.7). Total num frames: 547774464. Throughput: 0: 11741.3. Samples: 24435128. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:29:31,168][03423] Avg episode reward: [(0, '53.265')] -[2024-07-05 15:29:32,074][03976] Updated weights for policy 0, policy_version 69315 (0.0008) -[2024-07-05 15:29:33,779][03976] Updated weights for policy 0, policy_version 69325 (0.0008) -[2024-07-05 15:29:35,491][03976] Updated weights for policy 0, policy_version 69335 (0.0007) -[2024-07-05 15:29:36,168][03423] Fps is (10 sec: 47513.8, 60 sec: 47104.0, 300 sec: 46152.9). Total num frames: 548012032. Throughput: 0: 11738.4. Samples: 24470876. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:29:36,168][03423] Avg episode reward: [(0, '55.666')] -[2024-07-05 15:29:37,252][03976] Updated weights for policy 0, policy_version 69345 (0.0008) -[2024-07-05 15:29:38,982][03976] Updated weights for policy 0, policy_version 69355 (0.0008) -[2024-07-05 15:29:40,656][03976] Updated weights for policy 0, policy_version 69365 (0.0008) -[2024-07-05 15:29:41,168][03423] Fps is (10 sec: 47513.5, 60 sec: 47103.8, 300 sec: 46152.9). Total num frames: 548249600. Throughput: 0: 11718.6. Samples: 24541424. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:29:41,169][03423] Avg episode reward: [(0, '55.380')] -[2024-07-05 15:29:42,413][03976] Updated weights for policy 0, policy_version 69375 (0.0008) -[2024-07-05 15:29:44,137][03976] Updated weights for policy 0, policy_version 69385 (0.0007) -[2024-07-05 15:29:45,852][03976] Updated weights for policy 0, policy_version 69395 (0.0007) -[2024-07-05 15:29:46,168][03423] Fps is (10 sec: 47513.6, 60 sec: 47104.1, 300 sec: 46152.9). Total num frames: 548487168. Throughput: 0: 11750.0. Samples: 24613104. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:29:46,169][03423] Avg episode reward: [(0, '57.042')] -[2024-07-05 15:29:47,610][03976] Updated weights for policy 0, policy_version 69405 (0.0007) -[2024-07-05 15:29:49,346][03976] Updated weights for policy 0, policy_version 69415 (0.0008) -[2024-07-05 15:29:51,100][03976] Updated weights for policy 0, policy_version 69425 (0.0008) -[2024-07-05 15:29:51,168][03423] Fps is (10 sec: 47512.7, 60 sec: 46967.3, 300 sec: 46152.9). Total num frames: 548724736. Throughput: 0: 11764.0. Samples: 24648624. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:29:51,172][03423] Avg episode reward: [(0, '53.635')] -[2024-07-05 15:29:52,846][03976] Updated weights for policy 0, policy_version 69435 (0.0008) -[2024-07-05 15:29:54,589][03976] Updated weights for policy 0, policy_version 69445 (0.0012) -[2024-07-05 15:29:56,168][03423] Fps is (10 sec: 47513.6, 60 sec: 46967.5, 300 sec: 46152.9). Total num frames: 548962304. Throughput: 0: 11772.2. Samples: 24719272. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:29:56,169][03423] Avg episode reward: [(0, '54.925')] -[2024-07-05 15:29:56,313][03976] Updated weights for policy 0, policy_version 69455 (0.0008) -[2024-07-05 15:29:58,027][03976] Updated weights for policy 0, policy_version 69465 (0.0007) -[2024-07-05 15:29:59,752][03976] Updated weights for policy 0, policy_version 69475 (0.0008) -[2024-07-05 15:30:01,167][03423] Fps is (10 sec: 46695.9, 60 sec: 46967.5, 300 sec: 46125.1). Total num frames: 549191680. Throughput: 0: 11805.5. Samples: 24788980. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:30:01,168][03423] Avg episode reward: [(0, '55.405')] -[2024-07-05 15:30:01,553][03976] Updated weights for policy 0, policy_version 69485 (0.0009) -[2024-07-05 15:30:03,353][03976] Updated weights for policy 0, policy_version 69495 (0.0011) -[2024-07-05 15:30:05,117][03976] Updated weights for policy 0, policy_version 69505 (0.0008) -[2024-07-05 15:30:06,168][03423] Fps is (10 sec: 45874.6, 60 sec: 46967.4, 300 sec: 46069.6). Total num frames: 549421056. Throughput: 0: 11783.2. Samples: 24823548. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:30:06,169][03423] Avg episode reward: [(0, '56.285')] -[2024-07-05 15:30:06,220][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000069511_549429248.pth... -[2024-07-05 15:30:06,292][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000068172_538460160.pth -[2024-07-05 15:30:06,918][03976] Updated weights for policy 0, policy_version 69515 (0.0008) -[2024-07-05 15:30:08,651][03976] Updated weights for policy 0, policy_version 69525 (0.0010) -[2024-07-05 15:30:10,352][03976] Updated weights for policy 0, policy_version 69535 (0.0008) -[2024-07-05 15:30:11,167][03423] Fps is (10 sec: 46694.2, 60 sec: 46967.5, 300 sec: 46069.6). Total num frames: 549658624. Throughput: 0: 11771.3. Samples: 24894168. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:30:11,168][03423] Avg episode reward: [(0, '55.550')] -[2024-07-05 15:30:12,057][03976] Updated weights for policy 0, policy_version 69545 (0.0007) -[2024-07-05 15:30:13,782][03976] Updated weights for policy 0, policy_version 69555 (0.0007) -[2024-07-05 15:30:15,439][03976] Updated weights for policy 0, policy_version 69565 (0.0010) -[2024-07-05 15:30:16,167][03423] Fps is (10 sec: 48333.4, 60 sec: 47240.5, 300 sec: 46097.4). Total num frames: 549904384. Throughput: 0: 11798.7. Samples: 24966068. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:30:16,169][03423] Avg episode reward: [(0, '54.870')] -[2024-07-05 15:30:17,170][03976] Updated weights for policy 0, policy_version 69575 (0.0008) -[2024-07-05 15:30:18,929][03976] Updated weights for policy 0, policy_version 69585 (0.0009) -[2024-07-05 15:30:20,629][03976] Updated weights for policy 0, policy_version 69595 (0.0010) -[2024-07-05 15:30:21,168][03423] Fps is (10 sec: 48332.8, 60 sec: 47240.5, 300 sec: 46097.4). Total num frames: 550141952. Throughput: 0: 11807.2. Samples: 25002200. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:30:21,169][03423] Avg episode reward: [(0, '54.771')] -[2024-07-05 15:30:22,339][03976] Updated weights for policy 0, policy_version 69605 (0.0007) -[2024-07-05 15:30:24,046][03976] Updated weights for policy 0, policy_version 69615 (0.0007) -[2024-07-05 15:30:25,719][03976] Updated weights for policy 0, policy_version 69625 (0.0008) -[2024-07-05 15:30:26,168][03423] Fps is (10 sec: 47513.3, 60 sec: 47377.1, 300 sec: 46097.3). Total num frames: 550379520. Throughput: 0: 11833.1. Samples: 25073912. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:30:26,169][03423] Avg episode reward: [(0, '54.838')] -[2024-07-05 15:30:27,440][03976] Updated weights for policy 0, policy_version 69635 (0.0009) -[2024-07-05 15:30:29,149][03976] Updated weights for policy 0, policy_version 69645 (0.0008) -[2024-07-05 15:30:30,886][03976] Updated weights for policy 0, policy_version 69655 (0.0008) -[2024-07-05 15:30:31,168][03423] Fps is (10 sec: 47513.2, 60 sec: 47377.0, 300 sec: 46069.6). Total num frames: 550617088. Throughput: 0: 11835.6. Samples: 25145708. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:30:31,169][03423] Avg episode reward: [(0, '56.047')] -[2024-07-05 15:30:32,617][03976] Updated weights for policy 0, policy_version 69665 (0.0011) -[2024-07-05 15:30:34,324][03976] Updated weights for policy 0, policy_version 69675 (0.0010) -[2024-07-05 15:30:36,058][03976] Updated weights for policy 0, policy_version 69685 (0.0008) -[2024-07-05 15:30:36,168][03423] Fps is (10 sec: 47513.3, 60 sec: 47377.0, 300 sec: 46069.6). Total num frames: 550854656. Throughput: 0: 11831.4. Samples: 25181036. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:30:36,169][03423] Avg episode reward: [(0, '55.709')] -[2024-07-05 15:30:37,777][03976] Updated weights for policy 0, policy_version 69695 (0.0008) -[2024-07-05 15:30:39,499][03976] Updated weights for policy 0, policy_version 69705 (0.0008) -[2024-07-05 15:30:41,168][03423] Fps is (10 sec: 47513.5, 60 sec: 47377.0, 300 sec: 46069.6). Total num frames: 551092224. Throughput: 0: 11850.7. Samples: 25252556. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:30:41,177][03423] Avg episode reward: [(0, '54.682')] -[2024-07-05 15:30:41,236][03976] Updated weights for policy 0, policy_version 69715 (0.0010) -[2024-07-05 15:30:42,936][03976] Updated weights for policy 0, policy_version 69725 (0.0007) -[2024-07-05 15:30:44,604][03976] Updated weights for policy 0, policy_version 69735 (0.0008) -[2024-07-05 15:30:46,168][03423] Fps is (10 sec: 48333.3, 60 sec: 47513.6, 300 sec: 46069.6). Total num frames: 551337984. Throughput: 0: 11905.2. Samples: 25324716. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:30:46,168][03423] Avg episode reward: [(0, '56.456')] -[2024-07-05 15:30:46,315][03976] Updated weights for policy 0, policy_version 69745 (0.0008) -[2024-07-05 15:30:48,052][03976] Updated weights for policy 0, policy_version 69755 (0.0008) -[2024-07-05 15:30:49,761][03976] Updated weights for policy 0, policy_version 69765 (0.0008) -[2024-07-05 15:30:51,168][03423] Fps is (10 sec: 48333.2, 60 sec: 47513.8, 300 sec: 46069.6). Total num frames: 551575552. Throughput: 0: 11935.7. Samples: 25360652. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:30:51,168][03423] Avg episode reward: [(0, '54.104')] -[2024-07-05 15:30:51,482][03976] Updated weights for policy 0, policy_version 69775 (0.0007) -[2024-07-05 15:30:53,180][03976] Updated weights for policy 0, policy_version 69785 (0.0008) -[2024-07-05 15:30:54,887][03976] Updated weights for policy 0, policy_version 69795 (0.0008) -[2024-07-05 15:30:56,168][03423] Fps is (10 sec: 47513.6, 60 sec: 47513.6, 300 sec: 46069.6). Total num frames: 551813120. Throughput: 0: 11959.4. Samples: 25432340. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:30:56,169][03423] Avg episode reward: [(0, '56.044')] -[2024-07-05 15:30:56,617][03976] Updated weights for policy 0, policy_version 69805 (0.0007) -[2024-07-05 15:30:58,308][03976] Updated weights for policy 0, policy_version 69815 (0.0008) -[2024-07-05 15:31:00,051][03976] Updated weights for policy 0, policy_version 69825 (0.0007) -[2024-07-05 15:31:01,168][03423] Fps is (10 sec: 47513.6, 60 sec: 47650.1, 300 sec: 46069.6). Total num frames: 552050688. Throughput: 0: 11947.9. Samples: 25503724. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:01,168][03423] Avg episode reward: [(0, '55.153')] -[2024-07-05 15:31:01,745][03976] Updated weights for policy 0, policy_version 69835 (0.0008) -[2024-07-05 15:31:03,453][03976] Updated weights for policy 0, policy_version 69845 (0.0007) -[2024-07-05 15:31:05,167][03976] Updated weights for policy 0, policy_version 69855 (0.0008) -[2024-07-05 15:31:06,168][03423] Fps is (10 sec: 48332.0, 60 sec: 47923.2, 300 sec: 46125.1). Total num frames: 552296448. Throughput: 0: 11942.2. Samples: 25539600. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:06,169][03423] Avg episode reward: [(0, '54.799')] -[2024-07-05 15:31:06,904][03976] Updated weights for policy 0, policy_version 69865 (0.0009) -[2024-07-05 15:31:08,626][03976] Updated weights for policy 0, policy_version 69875 (0.0008) -[2024-07-05 15:31:10,300][03976] Updated weights for policy 0, policy_version 69885 (0.0008) -[2024-07-05 15:31:11,168][03423] Fps is (10 sec: 47513.4, 60 sec: 47786.6, 300 sec: 46180.7). Total num frames: 552525824. Throughput: 0: 11944.2. Samples: 25611400. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:11,177][03423] Avg episode reward: [(0, '56.256')] -[2024-07-05 15:31:11,963][03976] Updated weights for policy 0, policy_version 69895 (0.0008) -[2024-07-05 15:31:13,679][03976] Updated weights for policy 0, policy_version 69905 (0.0008) -[2024-07-05 15:31:15,400][03976] Updated weights for policy 0, policy_version 69915 (0.0007) -[2024-07-05 15:31:16,167][03423] Fps is (10 sec: 47514.7, 60 sec: 47786.7, 300 sec: 46264.0). Total num frames: 552771584. Throughput: 0: 11950.8. Samples: 25683492. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:16,169][03423] Avg episode reward: [(0, '53.371')] -[2024-07-05 15:31:17,152][03976] Updated weights for policy 0, policy_version 69925 (0.0008) -[2024-07-05 15:31:18,855][03976] Updated weights for policy 0, policy_version 69935 (0.0008) -[2024-07-05 15:31:20,550][03976] Updated weights for policy 0, policy_version 69945 (0.0008) -[2024-07-05 15:31:21,168][03423] Fps is (10 sec: 48332.8, 60 sec: 47786.6, 300 sec: 46347.3). Total num frames: 553009152. Throughput: 0: 11962.2. Samples: 25719332. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:21,169][03423] Avg episode reward: [(0, '52.658')] -[2024-07-05 15:31:22,302][03976] Updated weights for policy 0, policy_version 69955 (0.0008) -[2024-07-05 15:31:24,003][03976] Updated weights for policy 0, policy_version 69965 (0.0010) -[2024-07-05 15:31:25,702][03976] Updated weights for policy 0, policy_version 69975 (0.0008) -[2024-07-05 15:31:26,168][03423] Fps is (10 sec: 47513.1, 60 sec: 47786.7, 300 sec: 46347.3). Total num frames: 553246720. Throughput: 0: 11967.0. Samples: 25791072. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:26,169][03423] Avg episode reward: [(0, '55.008')] -[2024-07-05 15:31:27,381][03976] Updated weights for policy 0, policy_version 69985 (0.0008) -[2024-07-05 15:31:29,077][03976] Updated weights for policy 0, policy_version 69995 (0.0007) -[2024-07-05 15:31:30,784][03976] Updated weights for policy 0, policy_version 70005 (0.0012) -[2024-07-05 15:31:31,168][03423] Fps is (10 sec: 48332.9, 60 sec: 47923.2, 300 sec: 46375.1). Total num frames: 553492480. Throughput: 0: 11972.5. Samples: 25863480. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:31,169][03423] Avg episode reward: [(0, '56.528')] -[2024-07-05 15:31:32,482][03976] Updated weights for policy 0, policy_version 70015 (0.0007) -[2024-07-05 15:31:34,192][03976] Updated weights for policy 0, policy_version 70025 (0.0010) -[2024-07-05 15:31:35,914][03976] Updated weights for policy 0, policy_version 70035 (0.0011) -[2024-07-05 15:31:36,168][03423] Fps is (10 sec: 48333.0, 60 sec: 47923.3, 300 sec: 46402.8). Total num frames: 553730048. Throughput: 0: 11975.2. Samples: 25899536. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:36,169][03423] Avg episode reward: [(0, '55.881')] -[2024-07-05 15:31:37,590][03976] Updated weights for policy 0, policy_version 70045 (0.0010) -[2024-07-05 15:31:39,326][03976] Updated weights for policy 0, policy_version 70055 (0.0014) -[2024-07-05 15:31:41,034][03976] Updated weights for policy 0, policy_version 70065 (0.0008) -[2024-07-05 15:31:41,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48059.8, 300 sec: 46458.4). Total num frames: 553975808. Throughput: 0: 11980.9. Samples: 25971480. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:41,169][03423] Avg episode reward: [(0, '52.700')] -[2024-07-05 15:31:42,760][03976] Updated weights for policy 0, policy_version 70075 (0.0007) -[2024-07-05 15:31:44,455][03976] Updated weights for policy 0, policy_version 70085 (0.0010) -[2024-07-05 15:31:46,137][03976] Updated weights for policy 0, policy_version 70095 (0.0008) -[2024-07-05 15:31:46,168][03423] Fps is (10 sec: 48332.9, 60 sec: 47923.2, 300 sec: 46513.9). Total num frames: 554213376. Throughput: 0: 11989.1. Samples: 26043232. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:46,169][03423] Avg episode reward: [(0, '55.701')] -[2024-07-05 15:31:47,878][03976] Updated weights for policy 0, policy_version 70105 (0.0008) -[2024-07-05 15:31:49,575][03976] Updated weights for policy 0, policy_version 70115 (0.0007) -[2024-07-05 15:31:51,168][03423] Fps is (10 sec: 47513.4, 60 sec: 47923.2, 300 sec: 46597.2). Total num frames: 554450944. Throughput: 0: 11994.3. Samples: 26079340. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:51,168][03423] Avg episode reward: [(0, '54.751')] -[2024-07-05 15:31:51,273][03976] Updated weights for policy 0, policy_version 70125 (0.0008) -[2024-07-05 15:31:53,040][03976] Updated weights for policy 0, policy_version 70135 (0.0009) -[2024-07-05 15:31:54,740][03976] Updated weights for policy 0, policy_version 70145 (0.0007) -[2024-07-05 15:31:56,168][03423] Fps is (10 sec: 47513.2, 60 sec: 47923.1, 300 sec: 46624.9). Total num frames: 554688512. Throughput: 0: 11984.9. Samples: 26150720. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:31:56,169][03423] Avg episode reward: [(0, '54.932')] -[2024-07-05 15:31:56,465][03976] Updated weights for policy 0, policy_version 70155 (0.0009) -[2024-07-05 15:31:58,156][03976] Updated weights for policy 0, policy_version 70165 (0.0008) -[2024-07-05 15:31:59,890][03976] Updated weights for policy 0, policy_version 70175 (0.0009) -[2024-07-05 15:32:01,168][03423] Fps is (10 sec: 47513.7, 60 sec: 47923.2, 300 sec: 46680.5). Total num frames: 554926080. Throughput: 0: 11972.8. Samples: 26222268. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:32:01,169][03423] Avg episode reward: [(0, '56.965')] -[2024-07-05 15:32:01,630][03976] Updated weights for policy 0, policy_version 70185 (0.0008) -[2024-07-05 15:32:03,356][03976] Updated weights for policy 0, policy_version 70195 (0.0011) -[2024-07-05 15:32:05,024][03976] Updated weights for policy 0, policy_version 70205 (0.0009) -[2024-07-05 15:32:06,167][03423] Fps is (10 sec: 47514.1, 60 sec: 47786.8, 300 sec: 46791.6). Total num frames: 555163648. Throughput: 0: 11972.7. Samples: 26258104. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:32:06,168][03423] Avg episode reward: [(0, '55.513')] -[2024-07-05 15:32:06,172][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000070211_555163648.pth... -[2024-07-05 15:32:06,242][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000068823_543793152.pth -[2024-07-05 15:32:06,809][03976] Updated weights for policy 0, policy_version 70215 (0.0007) -[2024-07-05 15:32:08,479][03976] Updated weights for policy 0, policy_version 70225 (0.0008) -[2024-07-05 15:32:10,193][03976] Updated weights for policy 0, policy_version 70235 (0.0011) -[2024-07-05 15:32:11,168][03423] Fps is (10 sec: 47513.4, 60 sec: 47923.2, 300 sec: 46874.9). Total num frames: 555401216. Throughput: 0: 11963.7. Samples: 26329440. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:32:11,169][03423] Avg episode reward: [(0, '53.737')] -[2024-07-05 15:32:11,905][03976] Updated weights for policy 0, policy_version 70245 (0.0007) -[2024-07-05 15:32:13,619][03976] Updated weights for policy 0, policy_version 70255 (0.0008) -[2024-07-05 15:32:15,328][03976] Updated weights for policy 0, policy_version 70265 (0.0007) -[2024-07-05 15:32:16,167][03423] Fps is (10 sec: 48332.8, 60 sec: 47923.2, 300 sec: 46958.2). Total num frames: 555646976. Throughput: 0: 11956.5. Samples: 26401524. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:32:16,168][03423] Avg episode reward: [(0, '52.701')] -[2024-07-05 15:32:17,017][03976] Updated weights for policy 0, policy_version 70275 (0.0007) -[2024-07-05 15:32:18,752][03976] Updated weights for policy 0, policy_version 70285 (0.0010) -[2024-07-05 15:32:20,450][03976] Updated weights for policy 0, policy_version 70295 (0.0008) -[2024-07-05 15:32:21,167][03423] Fps is (10 sec: 48333.0, 60 sec: 47923.2, 300 sec: 46986.0). Total num frames: 555884544. Throughput: 0: 11955.4. Samples: 26437528. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:32:21,169][03423] Avg episode reward: [(0, '54.390')] -[2024-07-05 15:32:22,134][03976] Updated weights for policy 0, policy_version 70305 (0.0008) -[2024-07-05 15:32:23,844][03976] Updated weights for policy 0, policy_version 70315 (0.0007) -[2024-07-05 15:32:25,555][03976] Updated weights for policy 0, policy_version 70325 (0.0008) -[2024-07-05 15:32:26,168][03423] Fps is (10 sec: 47513.4, 60 sec: 47923.2, 300 sec: 47041.5). Total num frames: 556122112. Throughput: 0: 11955.9. Samples: 26509496. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:32:26,169][03423] Avg episode reward: [(0, '56.005')] -[2024-07-05 15:32:27,319][03976] Updated weights for policy 0, policy_version 70335 (0.0008) -[2024-07-05 15:32:28,993][03976] Updated weights for policy 0, policy_version 70345 (0.0007) -[2024-07-05 15:32:30,733][03976] Updated weights for policy 0, policy_version 70355 (0.0008) -[2024-07-05 15:32:31,168][03423] Fps is (10 sec: 47513.5, 60 sec: 47786.7, 300 sec: 47235.9). Total num frames: 556359680. Throughput: 0: 11956.0. Samples: 26581252. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:32:31,169][03423] Avg episode reward: [(0, '53.848')] -[2024-07-05 15:32:32,434][03976] Updated weights for policy 0, policy_version 70365 (0.0008) -[2024-07-05 15:32:34,099][03976] Updated weights for policy 0, policy_version 70375 (0.0008) -[2024-07-05 15:32:35,835][03976] Updated weights for policy 0, policy_version 70385 (0.0007) -[2024-07-05 15:32:36,168][03423] Fps is (10 sec: 48332.5, 60 sec: 47923.2, 300 sec: 47263.7). Total num frames: 556605440. Throughput: 0: 11952.1. Samples: 26617184. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:32:36,169][03423] Avg episode reward: [(0, '56.502')] -[2024-07-05 15:32:37,520][03976] Updated weights for policy 0, policy_version 70395 (0.0010) -[2024-07-05 15:32:39,282][03976] Updated weights for policy 0, policy_version 70405 (0.0008) -[2024-07-05 15:32:41,019][03976] Updated weights for policy 0, policy_version 70415 (0.0010) -[2024-07-05 15:32:41,168][03423] Fps is (10 sec: 48332.4, 60 sec: 47786.6, 300 sec: 47291.9). Total num frames: 556843008. Throughput: 0: 11960.9. Samples: 26688960. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:32:41,169][03423] Avg episode reward: [(0, '56.257')] -[2024-07-05 15:32:42,714][03976] Updated weights for policy 0, policy_version 70425 (0.0007) -[2024-07-05 15:32:44,418][03976] Updated weights for policy 0, policy_version 70435 (0.0007) -[2024-07-05 15:32:46,118][03976] Updated weights for policy 0, policy_version 70445 (0.0007) -[2024-07-05 15:32:46,168][03423] Fps is (10 sec: 47513.8, 60 sec: 47786.6, 300 sec: 47319.2). Total num frames: 557080576. Throughput: 0: 11966.3. Samples: 26760752. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:32:46,168][03423] Avg episode reward: [(0, '52.241')] -[2024-07-05 15:32:47,864][03976] Updated weights for policy 0, policy_version 70455 (0.0007) -[2024-07-05 15:32:49,562][03976] Updated weights for policy 0, policy_version 70465 (0.0008) -[2024-07-05 15:32:51,168][03423] Fps is (10 sec: 47513.6, 60 sec: 47786.6, 300 sec: 47319.2). Total num frames: 557318144. Throughput: 0: 11960.4. Samples: 26796324. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:32:51,169][03423] Avg episode reward: [(0, '56.953')] -[2024-07-05 15:32:51,270][03976] Updated weights for policy 0, policy_version 70475 (0.0011) -[2024-07-05 15:32:52,972][03976] Updated weights for policy 0, policy_version 70485 (0.0008) -[2024-07-05 15:32:54,629][03976] Updated weights for policy 0, policy_version 70495 (0.0007) -[2024-07-05 15:32:56,168][03423] Fps is (10 sec: 47513.6, 60 sec: 47786.7, 300 sec: 47402.5). Total num frames: 557555712. Throughput: 0: 11971.8. Samples: 26868172. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:32:56,169][03423] Avg episode reward: [(0, '55.075')] -[2024-07-05 15:32:56,373][03976] Updated weights for policy 0, policy_version 70505 (0.0010) -[2024-07-05 15:32:58,120][03976] Updated weights for policy 0, policy_version 70515 (0.0008) -[2024-07-05 15:32:59,851][03976] Updated weights for policy 0, policy_version 70525 (0.0007) -[2024-07-05 15:33:01,167][03423] Fps is (10 sec: 47514.1, 60 sec: 47786.7, 300 sec: 47458.1). Total num frames: 557793280. Throughput: 0: 11962.1. Samples: 26939820. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:33:01,168][03423] Avg episode reward: [(0, '56.120')] -[2024-07-05 15:33:01,545][03976] Updated weights for policy 0, policy_version 70535 (0.0009) -[2024-07-05 15:33:03,257][03976] Updated weights for policy 0, policy_version 70545 (0.0008) -[2024-07-05 15:33:04,965][03976] Updated weights for policy 0, policy_version 70555 (0.0008) -[2024-07-05 15:33:06,168][03423] Fps is (10 sec: 48332.9, 60 sec: 47923.2, 300 sec: 47541.4). Total num frames: 558039040. Throughput: 0: 11962.1. Samples: 26975824. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:33:06,169][03423] Avg episode reward: [(0, '55.913')] -[2024-07-05 15:33:06,686][03976] Updated weights for policy 0, policy_version 70565 (0.0010) -[2024-07-05 15:33:08,407][03976] Updated weights for policy 0, policy_version 70575 (0.0007) -[2024-07-05 15:33:10,120][03976] Updated weights for policy 0, policy_version 70585 (0.0008) -[2024-07-05 15:33:11,167][03423] Fps is (10 sec: 48332.8, 60 sec: 47923.2, 300 sec: 47541.4). Total num frames: 558276608. Throughput: 0: 11961.3. Samples: 27047756. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:33:11,169][03423] Avg episode reward: [(0, '54.648')] -[2024-07-05 15:33:11,765][03976] Updated weights for policy 0, policy_version 70595 (0.0009) -[2024-07-05 15:33:13,511][03976] Updated weights for policy 0, policy_version 70605 (0.0008) -[2024-07-05 15:33:15,215][03976] Updated weights for policy 0, policy_version 70615 (0.0008) -[2024-07-05 15:33:16,168][03423] Fps is (10 sec: 47513.4, 60 sec: 47786.6, 300 sec: 47569.1). Total num frames: 558514176. Throughput: 0: 11964.1. Samples: 27119636. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:33:16,169][03423] Avg episode reward: [(0, '54.929')] -[2024-07-05 15:33:16,975][03976] Updated weights for policy 0, policy_version 70625 (0.0008) -[2024-07-05 15:33:18,949][03976] Updated weights for policy 0, policy_version 70635 (0.0012) -[2024-07-05 15:33:20,847][03976] Updated weights for policy 0, policy_version 70645 (0.0010) -[2024-07-05 15:33:21,167][03423] Fps is (10 sec: 45056.0, 60 sec: 47377.1, 300 sec: 47485.8). Total num frames: 558727168. Throughput: 0: 11894.8. Samples: 27152448. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:33:21,168][03423] Avg episode reward: [(0, '54.321')] -[2024-07-05 15:33:22,598][03976] Updated weights for policy 0, policy_version 70655 (0.0008) -[2024-07-05 15:33:24,265][03976] Updated weights for policy 0, policy_version 70665 (0.0008) -[2024-07-05 15:33:25,957][03976] Updated weights for policy 0, policy_version 70675 (0.0008) -[2024-07-05 15:33:26,167][03423] Fps is (10 sec: 45875.5, 60 sec: 47513.6, 300 sec: 47513.6). Total num frames: 558972928. Throughput: 0: 11837.8. Samples: 27221660. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:33:26,169][03423] Avg episode reward: [(0, '51.532')] -[2024-07-05 15:33:27,831][03976] Updated weights for policy 0, policy_version 70685 (0.0009) -[2024-07-05 15:33:29,541][03976] Updated weights for policy 0, policy_version 70695 (0.0008) -[2024-07-05 15:33:31,167][03423] Fps is (10 sec: 47513.6, 60 sec: 47377.1, 300 sec: 47513.6). Total num frames: 559202304. Throughput: 0: 11800.5. Samples: 27291776. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:33:31,168][03423] Avg episode reward: [(0, '55.684')] -[2024-07-05 15:33:31,186][03976] Updated weights for policy 0, policy_version 70705 (0.0007) -[2024-07-05 15:33:32,969][03976] Updated weights for policy 0, policy_version 70715 (0.0008) -[2024-07-05 15:33:34,656][03976] Updated weights for policy 0, policy_version 70725 (0.0008) -[2024-07-05 15:33:36,167][03423] Fps is (10 sec: 47513.8, 60 sec: 47377.2, 300 sec: 47541.4). Total num frames: 559448064. Throughput: 0: 11808.3. Samples: 27327696. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:33:36,168][03423] Avg episode reward: [(0, '55.616')] -[2024-07-05 15:33:36,363][03976] Updated weights for policy 0, policy_version 70735 (0.0008) -[2024-07-05 15:33:38,091][03976] Updated weights for policy 0, policy_version 70745 (0.0008) -[2024-07-05 15:33:39,786][03976] Updated weights for policy 0, policy_version 70755 (0.0009) -[2024-07-05 15:33:41,168][03423] Fps is (10 sec: 48332.5, 60 sec: 47377.1, 300 sec: 47541.4). Total num frames: 559685632. Throughput: 0: 11799.8. Samples: 27399164. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:33:41,169][03423] Avg episode reward: [(0, '54.948')] -[2024-07-05 15:33:41,457][03976] Updated weights for policy 0, policy_version 70765 (0.0008) -[2024-07-05 15:33:43,107][03976] Updated weights for policy 0, policy_version 70775 (0.0007) -[2024-07-05 15:33:44,810][03976] Updated weights for policy 0, policy_version 70785 (0.0012) -[2024-07-05 15:33:46,168][03423] Fps is (10 sec: 48331.8, 60 sec: 47513.5, 300 sec: 47541.3). Total num frames: 559931392. Throughput: 0: 11836.3. Samples: 27472456. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:33:46,169][03423] Avg episode reward: [(0, '53.606')] -[2024-07-05 15:33:46,488][03976] Updated weights for policy 0, policy_version 70795 (0.0008) -[2024-07-05 15:33:48,157][03976] Updated weights for policy 0, policy_version 70805 (0.0009) -[2024-07-05 15:33:49,839][03976] Updated weights for policy 0, policy_version 70815 (0.0007) -[2024-07-05 15:33:51,168][03423] Fps is (10 sec: 49152.1, 60 sec: 47650.2, 300 sec: 47569.1). Total num frames: 560177152. Throughput: 0: 11851.6. Samples: 27509144. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:33:51,169][03423] Avg episode reward: [(0, '56.558')] -[2024-07-05 15:33:51,487][03976] Updated weights for policy 0, policy_version 70825 (0.0008) -[2024-07-05 15:33:53,176][03976] Updated weights for policy 0, policy_version 70835 (0.0007) -[2024-07-05 15:33:54,872][03976] Updated weights for policy 0, policy_version 70845 (0.0007) -[2024-07-05 15:33:56,168][03423] Fps is (10 sec: 48333.6, 60 sec: 47650.2, 300 sec: 47596.9). Total num frames: 560414720. Throughput: 0: 11888.0. Samples: 27582716. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:33:56,169][03423] Avg episode reward: [(0, '53.361')] -[2024-07-05 15:33:56,558][03976] Updated weights for policy 0, policy_version 70855 (0.0008) -[2024-07-05 15:33:58,337][03976] Updated weights for policy 0, policy_version 70865 (0.0008) -[2024-07-05 15:34:00,056][03976] Updated weights for policy 0, policy_version 70875 (0.0010) -[2024-07-05 15:34:01,167][03423] Fps is (10 sec: 47514.0, 60 sec: 47650.2, 300 sec: 47624.7). Total num frames: 560652288. Throughput: 0: 11883.1. Samples: 27654372. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:01,168][03423] Avg episode reward: [(0, '56.799')] -[2024-07-05 15:34:01,732][03976] Updated weights for policy 0, policy_version 70885 (0.0008) -[2024-07-05 15:34:03,409][03976] Updated weights for policy 0, policy_version 70895 (0.0007) -[2024-07-05 15:34:05,111][03976] Updated weights for policy 0, policy_version 70905 (0.0008) -[2024-07-05 15:34:06,167][03423] Fps is (10 sec: 48332.8, 60 sec: 47650.2, 300 sec: 47652.5). Total num frames: 560898048. Throughput: 0: 11962.1. Samples: 27690744. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:06,168][03423] Avg episode reward: [(0, '53.825')] -[2024-07-05 15:34:06,173][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000070911_560898048.pth... -[2024-07-05 15:34:06,238][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000069511_549429248.pth -[2024-07-05 15:34:06,837][03976] Updated weights for policy 0, policy_version 70915 (0.0008) -[2024-07-05 15:34:08,502][03976] Updated weights for policy 0, policy_version 70925 (0.0009) -[2024-07-05 15:34:10,198][03976] Updated weights for policy 0, policy_version 70935 (0.0008) -[2024-07-05 15:34:11,168][03423] Fps is (10 sec: 48332.1, 60 sec: 47650.1, 300 sec: 47680.2). Total num frames: 561135616. Throughput: 0: 12029.8. Samples: 27763004. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:11,169][03423] Avg episode reward: [(0, '57.814')] -[2024-07-05 15:34:11,877][03976] Updated weights for policy 0, policy_version 70945 (0.0007) -[2024-07-05 15:34:13,568][03976] Updated weights for policy 0, policy_version 70955 (0.0008) -[2024-07-05 15:34:15,328][03976] Updated weights for policy 0, policy_version 70965 (0.0010) -[2024-07-05 15:34:16,167][03423] Fps is (10 sec: 47513.7, 60 sec: 47650.2, 300 sec: 47680.2). Total num frames: 561373184. Throughput: 0: 12069.2. Samples: 27834888. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:16,168][03423] Avg episode reward: [(0, '55.814')] -[2024-07-05 15:34:17,005][03976] Updated weights for policy 0, policy_version 70975 (0.0007) -[2024-07-05 15:34:18,673][03976] Updated weights for policy 0, policy_version 70985 (0.0010) -[2024-07-05 15:34:20,398][03976] Updated weights for policy 0, policy_version 70995 (0.0007) -[2024-07-05 15:34:21,167][03423] Fps is (10 sec: 48333.4, 60 sec: 48196.3, 300 sec: 47735.8). Total num frames: 561618944. Throughput: 0: 12088.9. Samples: 27871696. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:21,169][03423] Avg episode reward: [(0, '57.007')] -[2024-07-05 15:34:22,112][03976] Updated weights for policy 0, policy_version 71005 (0.0009) -[2024-07-05 15:34:23,802][03976] Updated weights for policy 0, policy_version 71015 (0.0011) -[2024-07-05 15:34:25,494][03976] Updated weights for policy 0, policy_version 71025 (0.0008) -[2024-07-05 15:34:26,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48059.7, 300 sec: 47735.8). Total num frames: 561856512. Throughput: 0: 12106.4. Samples: 27943952. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:26,168][03423] Avg episode reward: [(0, '54.505')] -[2024-07-05 15:34:27,189][03976] Updated weights for policy 0, policy_version 71035 (0.0008) -[2024-07-05 15:34:28,870][03976] Updated weights for policy 0, policy_version 71045 (0.0007) -[2024-07-05 15:34:30,561][03976] Updated weights for policy 0, policy_version 71055 (0.0009) -[2024-07-05 15:34:31,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48332.8, 300 sec: 47763.5). Total num frames: 562102272. Throughput: 0: 12091.1. Samples: 28016552. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:31,168][03423] Avg episode reward: [(0, '52.948')] -[2024-07-05 15:34:32,263][03976] Updated weights for policy 0, policy_version 71065 (0.0009) -[2024-07-05 15:34:33,956][03976] Updated weights for policy 0, policy_version 71075 (0.0013) -[2024-07-05 15:34:35,642][03976] Updated weights for policy 0, policy_version 71085 (0.0014) -[2024-07-05 15:34:36,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48196.2, 300 sec: 47763.5). Total num frames: 562339840. Throughput: 0: 12087.2. Samples: 28053068. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:36,169][03423] Avg episode reward: [(0, '55.590')] -[2024-07-05 15:34:37,335][03976] Updated weights for policy 0, policy_version 71095 (0.0008) -[2024-07-05 15:34:39,128][03976] Updated weights for policy 0, policy_version 71105 (0.0008) -[2024-07-05 15:34:40,918][03976] Updated weights for policy 0, policy_version 71115 (0.0009) -[2024-07-05 15:34:41,168][03423] Fps is (10 sec: 47513.5, 60 sec: 48196.3, 300 sec: 47763.5). Total num frames: 562577408. Throughput: 0: 12021.8. Samples: 28123696. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:41,169][03423] Avg episode reward: [(0, '55.365')] -[2024-07-05 15:34:42,556][03976] Updated weights for policy 0, policy_version 71125 (0.0007) -[2024-07-05 15:34:44,245][03976] Updated weights for policy 0, policy_version 71135 (0.0008) -[2024-07-05 15:34:45,943][03976] Updated weights for policy 0, policy_version 71145 (0.0008) -[2024-07-05 15:34:46,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.4, 300 sec: 47791.3). Total num frames: 562823168. Throughput: 0: 12050.4. Samples: 28196640. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:46,169][03423] Avg episode reward: [(0, '57.498')] -[2024-07-05 15:34:47,715][03976] Updated weights for policy 0, policy_version 71155 (0.0008) -[2024-07-05 15:34:49,365][03976] Updated weights for policy 0, policy_version 71165 (0.0008) -[2024-07-05 15:34:51,068][03976] Updated weights for policy 0, policy_version 71175 (0.0008) -[2024-07-05 15:34:51,167][03423] Fps is (10 sec: 48333.2, 60 sec: 48059.8, 300 sec: 47791.3). Total num frames: 563060736. Throughput: 0: 12039.6. Samples: 28232524. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:51,169][03423] Avg episode reward: [(0, '54.979')] -[2024-07-05 15:34:52,741][03976] Updated weights for policy 0, policy_version 71185 (0.0007) -[2024-07-05 15:34:54,405][03976] Updated weights for policy 0, policy_version 71195 (0.0008) -[2024-07-05 15:34:56,158][03976] Updated weights for policy 0, policy_version 71205 (0.0009) -[2024-07-05 15:34:56,168][03423] Fps is (10 sec: 48332.1, 60 sec: 48196.1, 300 sec: 47846.8). Total num frames: 563306496. Throughput: 0: 12045.8. Samples: 28305064. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:34:56,169][03423] Avg episode reward: [(0, '56.899')] -[2024-07-05 15:34:57,843][03976] Updated weights for policy 0, policy_version 71215 (0.0008) -[2024-07-05 15:34:59,589][03976] Updated weights for policy 0, policy_version 71225 (0.0008) -[2024-07-05 15:35:01,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48196.2, 300 sec: 47874.6). Total num frames: 563544064. Throughput: 0: 12021.8. Samples: 28375872. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:35:01,169][03423] Avg episode reward: [(0, '57.198')] -[2024-07-05 15:35:01,291][03976] Updated weights for policy 0, policy_version 71235 (0.0009) -[2024-07-05 15:35:03,011][03976] Updated weights for policy 0, policy_version 71245 (0.0007) -[2024-07-05 15:35:04,709][03976] Updated weights for policy 0, policy_version 71255 (0.0009) -[2024-07-05 15:35:06,167][03423] Fps is (10 sec: 47514.5, 60 sec: 48059.7, 300 sec: 47874.6). Total num frames: 563781632. Throughput: 0: 12011.4. Samples: 28412208. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:35:06,168][03423] Avg episode reward: [(0, '55.400')] -[2024-07-05 15:35:06,411][03976] Updated weights for policy 0, policy_version 71265 (0.0008) -[2024-07-05 15:35:08,261][03976] Updated weights for policy 0, policy_version 71275 (0.0009) -[2024-07-05 15:35:09,976][03976] Updated weights for policy 0, policy_version 71285 (0.0010) -[2024-07-05 15:35:11,167][03423] Fps is (10 sec: 47513.9, 60 sec: 48059.8, 300 sec: 47846.8). Total num frames: 564019200. Throughput: 0: 11973.6. Samples: 28482764. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:35:11,168][03423] Avg episode reward: [(0, '57.843')] -[2024-07-05 15:35:11,647][03976] Updated weights for policy 0, policy_version 71295 (0.0007) -[2024-07-05 15:35:13,358][03976] Updated weights for policy 0, policy_version 71305 (0.0008) -[2024-07-05 15:35:15,058][03976] Updated weights for policy 0, policy_version 71315 (0.0007) -[2024-07-05 15:35:16,168][03423] Fps is (10 sec: 47512.9, 60 sec: 48059.6, 300 sec: 47846.8). Total num frames: 564256768. Throughput: 0: 11956.8. Samples: 28554608. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:35:16,169][03423] Avg episode reward: [(0, '54.354')] -[2024-07-05 15:35:16,756][03976] Updated weights for policy 0, policy_version 71325 (0.0010) -[2024-07-05 15:35:18,486][03976] Updated weights for policy 0, policy_version 71335 (0.0008) -[2024-07-05 15:35:20,155][03976] Updated weights for policy 0, policy_version 71345 (0.0008) -[2024-07-05 15:35:21,168][03423] Fps is (10 sec: 47513.4, 60 sec: 47923.1, 300 sec: 47846.8). Total num frames: 564494336. Throughput: 0: 11957.0. Samples: 28591132. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:35:21,169][03423] Avg episode reward: [(0, '54.686')] -[2024-07-05 15:35:21,854][03976] Updated weights for policy 0, policy_version 71355 (0.0010) -[2024-07-05 15:35:23,561][03976] Updated weights for policy 0, policy_version 71365 (0.0007) -[2024-07-05 15:35:25,190][03976] Updated weights for policy 0, policy_version 71375 (0.0010) -[2024-07-05 15:35:26,167][03423] Fps is (10 sec: 48333.8, 60 sec: 48059.8, 300 sec: 47874.6). Total num frames: 564740096. Throughput: 0: 12002.2. Samples: 28663792. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:35:26,169][03423] Avg episode reward: [(0, '56.864')] -[2024-07-05 15:35:26,867][03976] Updated weights for policy 0, policy_version 71385 (0.0008) -[2024-07-05 15:35:28,513][03976] Updated weights for policy 0, policy_version 71395 (0.0010) -[2024-07-05 15:35:30,235][03976] Updated weights for policy 0, policy_version 71405 (0.0007) -[2024-07-05 15:35:31,168][03423] Fps is (10 sec: 49152.1, 60 sec: 48059.8, 300 sec: 47902.4). Total num frames: 564985856. Throughput: 0: 12023.2. Samples: 28737684. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:35:31,169][03423] Avg episode reward: [(0, '55.798')] -[2024-07-05 15:35:31,914][03976] Updated weights for policy 0, policy_version 71415 (0.0009) -[2024-07-05 15:35:33,585][03976] Updated weights for policy 0, policy_version 71425 (0.0007) -[2024-07-05 15:35:35,253][03976] Updated weights for policy 0, policy_version 71435 (0.0008) -[2024-07-05 15:35:36,167][03423] Fps is (10 sec: 49151.6, 60 sec: 48196.3, 300 sec: 47930.2). Total num frames: 565231616. Throughput: 0: 12030.8. Samples: 28773912. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:35:36,168][03423] Avg episode reward: [(0, '54.055')] -[2024-07-05 15:35:37,039][03976] Updated weights for policy 0, policy_version 71445 (0.0008) -[2024-07-05 15:35:38,737][03976] Updated weights for policy 0, policy_version 71455 (0.0011) -[2024-07-05 15:35:40,450][03976] Updated weights for policy 0, policy_version 71465 (0.0011) -[2024-07-05 15:35:41,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 47902.4). Total num frames: 565469184. Throughput: 0: 12022.4. Samples: 28846072. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) -[2024-07-05 15:35:41,169][03423] Avg episode reward: [(0, '55.582')] -[2024-07-05 15:35:42,197][03976] Updated weights for policy 0, policy_version 71475 (0.0008) -[2024-07-05 15:35:43,887][03976] Updated weights for policy 0, policy_version 71485 (0.0008) -[2024-07-05 15:35:45,597][03976] Updated weights for policy 0, policy_version 71495 (0.0008) -[2024-07-05 15:35:46,168][03423] Fps is (10 sec: 47513.4, 60 sec: 48059.7, 300 sec: 47902.4). Total num frames: 565706752. Throughput: 0: 12026.5. Samples: 28917064. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:35:46,169][03423] Avg episode reward: [(0, '54.824')] -[2024-07-05 15:35:47,309][03976] Updated weights for policy 0, policy_version 71505 (0.0013) -[2024-07-05 15:35:49,033][03976] Updated weights for policy 0, policy_version 71515 (0.0008) -[2024-07-05 15:35:50,742][03976] Updated weights for policy 0, policy_version 71525 (0.0009) -[2024-07-05 15:35:51,167][03423] Fps is (10 sec: 47513.8, 60 sec: 48059.7, 300 sec: 47902.4). Total num frames: 565944320. Throughput: 0: 12020.4. Samples: 28953124. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:35:51,168][03423] Avg episode reward: [(0, '54.377')] -[2024-07-05 15:35:52,448][03976] Updated weights for policy 0, policy_version 71535 (0.0008) -[2024-07-05 15:35:54,111][03976] Updated weights for policy 0, policy_version 71545 (0.0008) -[2024-07-05 15:35:55,840][03976] Updated weights for policy 0, policy_version 71555 (0.0008) -[2024-07-05 15:35:56,167][03423] Fps is (10 sec: 48333.1, 60 sec: 48059.9, 300 sec: 47930.1). Total num frames: 566190080. Throughput: 0: 12066.9. Samples: 29025776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:35:56,168][03423] Avg episode reward: [(0, '54.436')] -[2024-07-05 15:35:57,466][03976] Updated weights for policy 0, policy_version 71565 (0.0007) -[2024-07-05 15:35:59,112][03976] Updated weights for policy 0, policy_version 71575 (0.0010) -[2024-07-05 15:36:00,847][03976] Updated weights for policy 0, policy_version 71585 (0.0008) -[2024-07-05 15:36:01,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48059.7, 300 sec: 47902.4). Total num frames: 566427648. Throughput: 0: 12086.7. Samples: 29098508. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:01,169][03423] Avg episode reward: [(0, '56.687')] -[2024-07-05 15:36:02,507][03976] Updated weights for policy 0, policy_version 71595 (0.0009) -[2024-07-05 15:36:04,187][03976] Updated weights for policy 0, policy_version 71605 (0.0007) -[2024-07-05 15:36:05,857][03976] Updated weights for policy 0, policy_version 71615 (0.0007) -[2024-07-05 15:36:06,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48196.3, 300 sec: 47957.9). Total num frames: 566673408. Throughput: 0: 12087.1. Samples: 29135052. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:06,168][03423] Avg episode reward: [(0, '52.671')] -[2024-07-05 15:36:06,210][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000071617_566681600.pth... -[2024-07-05 15:36:06,275][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000070211_555163648.pth -[2024-07-05 15:36:07,608][03976] Updated weights for policy 0, policy_version 71625 (0.0010) -[2024-07-05 15:36:09,266][03976] Updated weights for policy 0, policy_version 71635 (0.0007) -[2024-07-05 15:36:10,964][03976] Updated weights for policy 0, policy_version 71645 (0.0010) -[2024-07-05 15:36:11,171][03423] Fps is (10 sec: 49134.1, 60 sec: 48329.8, 300 sec: 47957.3). Total num frames: 566919168. Throughput: 0: 12090.7. Samples: 29207920. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:11,172][03423] Avg episode reward: [(0, '54.841')] -[2024-07-05 15:36:12,652][03976] Updated weights for policy 0, policy_version 71655 (0.0010) -[2024-07-05 15:36:14,357][03976] Updated weights for policy 0, policy_version 71665 (0.0007) -[2024-07-05 15:36:16,046][03976] Updated weights for policy 0, policy_version 71675 (0.0007) -[2024-07-05 15:36:16,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48332.9, 300 sec: 47957.9). Total num frames: 567156736. Throughput: 0: 12060.8. Samples: 29280420. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:16,168][03423] Avg episode reward: [(0, '55.416')] -[2024-07-05 15:36:17,726][03976] Updated weights for policy 0, policy_version 71685 (0.0007) -[2024-07-05 15:36:19,415][03976] Updated weights for policy 0, policy_version 71695 (0.0007) -[2024-07-05 15:36:21,071][03976] Updated weights for policy 0, policy_version 71705 (0.0007) -[2024-07-05 15:36:21,168][03423] Fps is (10 sec: 48350.3, 60 sec: 48469.3, 300 sec: 47985.7). Total num frames: 567402496. Throughput: 0: 12066.0. Samples: 29316884. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:21,169][03423] Avg episode reward: [(0, '50.943')] -[2024-07-05 15:36:22,717][03976] Updated weights for policy 0, policy_version 71715 (0.0008) -[2024-07-05 15:36:24,371][03976] Updated weights for policy 0, policy_version 71725 (0.0008) -[2024-07-05 15:36:26,113][03976] Updated weights for policy 0, policy_version 71735 (0.0007) -[2024-07-05 15:36:26,168][03423] Fps is (10 sec: 49151.7, 60 sec: 48469.2, 300 sec: 47985.7). Total num frames: 567648256. Throughput: 0: 12101.1. Samples: 29390624. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:26,168][03423] Avg episode reward: [(0, '53.337')] -[2024-07-05 15:36:27,867][03976] Updated weights for policy 0, policy_version 71745 (0.0007) -[2024-07-05 15:36:29,553][03976] Updated weights for policy 0, policy_version 71755 (0.0008) -[2024-07-05 15:36:31,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 47985.7). Total num frames: 567885824. Throughput: 0: 12121.2. Samples: 29462520. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:31,168][03423] Avg episode reward: [(0, '54.935')] -[2024-07-05 15:36:31,213][03976] Updated weights for policy 0, policy_version 71765 (0.0010) -[2024-07-05 15:36:32,866][03976] Updated weights for policy 0, policy_version 71775 (0.0008) -[2024-07-05 15:36:34,543][03976] Updated weights for policy 0, policy_version 71785 (0.0007) -[2024-07-05 15:36:36,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 47985.7). Total num frames: 568131584. Throughput: 0: 12142.5. Samples: 29499536. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:36,169][03423] Avg episode reward: [(0, '54.192')] -[2024-07-05 15:36:36,222][03976] Updated weights for policy 0, policy_version 71795 (0.0008) -[2024-07-05 15:36:37,892][03976] Updated weights for policy 0, policy_version 71805 (0.0011) -[2024-07-05 15:36:39,613][03976] Updated weights for policy 0, policy_version 71815 (0.0008) -[2024-07-05 15:36:41,168][03423] Fps is (10 sec: 49151.7, 60 sec: 48469.3, 300 sec: 48013.4). Total num frames: 568377344. Throughput: 0: 12144.9. Samples: 29572296. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:41,169][03423] Avg episode reward: [(0, '55.498')] -[2024-07-05 15:36:41,257][03976] Updated weights for policy 0, policy_version 71825 (0.0009) -[2024-07-05 15:36:42,968][03976] Updated weights for policy 0, policy_version 71835 (0.0008) -[2024-07-05 15:36:44,672][03976] Updated weights for policy 0, policy_version 71845 (0.0011) -[2024-07-05 15:36:46,168][03423] Fps is (10 sec: 49151.4, 60 sec: 48605.8, 300 sec: 48041.2). Total num frames: 568623104. Throughput: 0: 12148.9. Samples: 29645208. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:46,169][03423] Avg episode reward: [(0, '55.229')] -[2024-07-05 15:36:46,334][03976] Updated weights for policy 0, policy_version 71855 (0.0007) -[2024-07-05 15:36:48,021][03976] Updated weights for policy 0, policy_version 71865 (0.0007) -[2024-07-05 15:36:49,683][03976] Updated weights for policy 0, policy_version 71875 (0.0007) -[2024-07-05 15:36:51,168][03423] Fps is (10 sec: 49152.4, 60 sec: 48742.4, 300 sec: 48069.0). Total num frames: 568868864. Throughput: 0: 12158.8. Samples: 29682200. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:51,168][03423] Avg episode reward: [(0, '56.058')] -[2024-07-05 15:36:51,322][03976] Updated weights for policy 0, policy_version 71885 (0.0010) -[2024-07-05 15:36:53,021][03976] Updated weights for policy 0, policy_version 71895 (0.0007) -[2024-07-05 15:36:54,684][03976] Updated weights for policy 0, policy_version 71905 (0.0008) -[2024-07-05 15:36:56,168][03423] Fps is (10 sec: 48333.1, 60 sec: 48605.8, 300 sec: 48069.0). Total num frames: 569106432. Throughput: 0: 12183.9. Samples: 29756152. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:36:56,169][03423] Avg episode reward: [(0, '55.063')] -[2024-07-05 15:36:56,382][03976] Updated weights for policy 0, policy_version 71915 (0.0007) -[2024-07-05 15:36:58,066][03976] Updated weights for policy 0, policy_version 71925 (0.0008) -[2024-07-05 15:36:59,741][03976] Updated weights for policy 0, policy_version 71935 (0.0013) -[2024-07-05 15:37:01,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48742.4, 300 sec: 48096.7). Total num frames: 569352192. Throughput: 0: 12183.2. Samples: 29828664. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:01,169][03423] Avg episode reward: [(0, '54.931')] -[2024-07-05 15:37:01,464][03976] Updated weights for policy 0, policy_version 71945 (0.0008) -[2024-07-05 15:37:03,159][03976] Updated weights for policy 0, policy_version 71955 (0.0010) -[2024-07-05 15:37:04,861][03976] Updated weights for policy 0, policy_version 71965 (0.0008) -[2024-07-05 15:37:06,167][03423] Fps is (10 sec: 48333.2, 60 sec: 48605.9, 300 sec: 48096.8). Total num frames: 569589760. Throughput: 0: 12168.6. Samples: 29864468. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:06,168][03423] Avg episode reward: [(0, '54.630')] -[2024-07-05 15:37:06,508][03976] Updated weights for policy 0, policy_version 71975 (0.0009) -[2024-07-05 15:37:08,148][03976] Updated weights for policy 0, policy_version 71985 (0.0008) -[2024-07-05 15:37:09,841][03976] Updated weights for policy 0, policy_version 71995 (0.0008) -[2024-07-05 15:37:11,168][03423] Fps is (10 sec: 48332.1, 60 sec: 48608.7, 300 sec: 48096.7). Total num frames: 569835520. Throughput: 0: 12165.7. Samples: 29938084. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:11,171][03423] Avg episode reward: [(0, '55.702')] -[2024-07-05 15:37:11,563][03976] Updated weights for policy 0, policy_version 72005 (0.0008) -[2024-07-05 15:37:13,296][03976] Updated weights for policy 0, policy_version 72015 (0.0008) -[2024-07-05 15:37:14,970][03976] Updated weights for policy 0, policy_version 72025 (0.0007) -[2024-07-05 15:37:16,167][03423] Fps is (10 sec: 49152.0, 60 sec: 48742.4, 300 sec: 48124.5). Total num frames: 570081280. Throughput: 0: 12173.4. Samples: 30010324. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:16,168][03423] Avg episode reward: [(0, '55.209')] -[2024-07-05 15:37:16,649][03976] Updated weights for policy 0, policy_version 72035 (0.0010) -[2024-07-05 15:37:18,309][03976] Updated weights for policy 0, policy_version 72045 (0.0008) -[2024-07-05 15:37:20,055][03976] Updated weights for policy 0, policy_version 72055 (0.0010) -[2024-07-05 15:37:21,168][03423] Fps is (10 sec: 48333.7, 60 sec: 48605.9, 300 sec: 48124.5). Total num frames: 570318848. Throughput: 0: 12172.4. Samples: 30047296. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:21,169][03423] Avg episode reward: [(0, '56.427')] -[2024-07-05 15:37:21,691][03976] Updated weights for policy 0, policy_version 72065 (0.0011) -[2024-07-05 15:37:23,392][03976] Updated weights for policy 0, policy_version 72075 (0.0008) -[2024-07-05 15:37:25,084][03976] Updated weights for policy 0, policy_version 72085 (0.0007) -[2024-07-05 15:37:26,168][03423] Fps is (10 sec: 48331.6, 60 sec: 48605.7, 300 sec: 48152.3). Total num frames: 570564608. Throughput: 0: 12166.1. Samples: 30119772. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:26,169][03423] Avg episode reward: [(0, '56.282')] -[2024-07-05 15:37:26,750][03976] Updated weights for policy 0, policy_version 72095 (0.0008) -[2024-07-05 15:37:28,444][03976] Updated weights for policy 0, policy_version 72105 (0.0008) -[2024-07-05 15:37:30,101][03976] Updated weights for policy 0, policy_version 72115 (0.0008) -[2024-07-05 15:37:31,168][03423] Fps is (10 sec: 49152.2, 60 sec: 48742.4, 300 sec: 48152.3). Total num frames: 570810368. Throughput: 0: 12176.9. Samples: 30193168. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:31,168][03423] Avg episode reward: [(0, '56.158')] -[2024-07-05 15:37:31,764][03976] Updated weights for policy 0, policy_version 72125 (0.0009) -[2024-07-05 15:37:33,469][03976] Updated weights for policy 0, policy_version 72135 (0.0011) -[2024-07-05 15:37:35,163][03976] Updated weights for policy 0, policy_version 72145 (0.0009) -[2024-07-05 15:37:36,167][03423] Fps is (10 sec: 49153.0, 60 sec: 48742.4, 300 sec: 48180.1). Total num frames: 571056128. Throughput: 0: 12163.2. Samples: 30229544. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:36,169][03423] Avg episode reward: [(0, '56.070')] -[2024-07-05 15:37:36,833][03976] Updated weights for policy 0, policy_version 72155 (0.0008) -[2024-07-05 15:37:38,433][03976] Updated weights for policy 0, policy_version 72165 (0.0008) -[2024-07-05 15:37:40,144][03976] Updated weights for policy 0, policy_version 72175 (0.0008) -[2024-07-05 15:37:41,167][03423] Fps is (10 sec: 49152.3, 60 sec: 48742.5, 300 sec: 48207.9). Total num frames: 571301888. Throughput: 0: 12157.6. Samples: 30303244. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:41,168][03423] Avg episode reward: [(0, '53.388')] -[2024-07-05 15:37:41,813][03976] Updated weights for policy 0, policy_version 72185 (0.0009) -[2024-07-05 15:37:43,492][03976] Updated weights for policy 0, policy_version 72195 (0.0007) -[2024-07-05 15:37:45,146][03976] Updated weights for policy 0, policy_version 72205 (0.0010) -[2024-07-05 15:37:46,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48606.0, 300 sec: 48207.9). Total num frames: 571539456. Throughput: 0: 12182.2. Samples: 30376860. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:46,168][03423] Avg episode reward: [(0, '57.079')] -[2024-07-05 15:37:46,833][03976] Updated weights for policy 0, policy_version 72215 (0.0007) -[2024-07-05 15:37:48,488][03976] Updated weights for policy 0, policy_version 72225 (0.0008) -[2024-07-05 15:37:50,204][03976] Updated weights for policy 0, policy_version 72235 (0.0008) -[2024-07-05 15:37:51,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48605.9, 300 sec: 48235.6). Total num frames: 571785216. Throughput: 0: 12203.7. Samples: 30413636. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:37:51,168][03423] Avg episode reward: [(0, '56.034')] -[2024-07-05 15:37:51,957][03976] Updated weights for policy 0, policy_version 72245 (0.0012) -[2024-07-05 15:37:53,644][03976] Updated weights for policy 0, policy_version 72255 (0.0008) -[2024-07-05 15:37:55,326][03976] Updated weights for policy 0, policy_version 72265 (0.0008) -[2024-07-05 15:37:56,168][03423] Fps is (10 sec: 48332.1, 60 sec: 48605.8, 300 sec: 48235.6). Total num frames: 572022784. Throughput: 0: 12152.7. Samples: 30484956. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:37:56,169][03423] Avg episode reward: [(0, '53.974')] -[2024-07-05 15:37:56,976][03976] Updated weights for policy 0, policy_version 72275 (0.0007) -[2024-07-05 15:37:58,700][03976] Updated weights for policy 0, policy_version 72285 (0.0008) -[2024-07-05 15:38:00,370][03976] Updated weights for policy 0, policy_version 72295 (0.0008) -[2024-07-05 15:38:01,168][03423] Fps is (10 sec: 48332.2, 60 sec: 48605.8, 300 sec: 48235.6). Total num frames: 572268544. Throughput: 0: 12176.6. Samples: 30558272. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:01,168][03423] Avg episode reward: [(0, '57.297')] -[2024-07-05 15:38:02,073][03976] Updated weights for policy 0, policy_version 72305 (0.0007) -[2024-07-05 15:38:03,785][03976] Updated weights for policy 0, policy_version 72315 (0.0008) -[2024-07-05 15:38:05,484][03976] Updated weights for policy 0, policy_version 72325 (0.0008) -[2024-07-05 15:38:06,168][03423] Fps is (10 sec: 48333.3, 60 sec: 48605.8, 300 sec: 48235.6). Total num frames: 572506112. Throughput: 0: 12148.4. Samples: 30593976. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:06,168][03423] Avg episode reward: [(0, '54.140')] -[2024-07-05 15:38:06,202][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000072329_572514304.pth... -[2024-07-05 15:38:06,267][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000070911_560898048.pth -[2024-07-05 15:38:07,219][03976] Updated weights for policy 0, policy_version 72335 (0.0010) -[2024-07-05 15:38:08,902][03976] Updated weights for policy 0, policy_version 72345 (0.0008) -[2024-07-05 15:38:10,570][03976] Updated weights for policy 0, policy_version 72355 (0.0008) -[2024-07-05 15:38:11,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48605.9, 300 sec: 48263.4). Total num frames: 572751872. Throughput: 0: 12150.2. Samples: 30666532. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:11,169][03423] Avg episode reward: [(0, '56.498')] -[2024-07-05 15:38:12,227][03976] Updated weights for policy 0, policy_version 72365 (0.0009) -[2024-07-05 15:38:13,927][03976] Updated weights for policy 0, policy_version 72375 (0.0008) -[2024-07-05 15:38:15,688][03976] Updated weights for policy 0, policy_version 72385 (0.0012) -[2024-07-05 15:38:16,167][03423] Fps is (10 sec: 49152.2, 60 sec: 48605.8, 300 sec: 48374.5). Total num frames: 572997632. Throughput: 0: 12135.7. Samples: 30739276. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:16,168][03423] Avg episode reward: [(0, '55.477')] -[2024-07-05 15:38:17,318][03976] Updated weights for policy 0, policy_version 72395 (0.0007) -[2024-07-05 15:38:19,039][03976] Updated weights for policy 0, policy_version 72405 (0.0011) -[2024-07-05 15:38:20,776][03976] Updated weights for policy 0, policy_version 72415 (0.0008) -[2024-07-05 15:38:21,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48605.8, 300 sec: 48346.7). Total num frames: 573235200. Throughput: 0: 12139.3. Samples: 30775812. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:21,169][03423] Avg episode reward: [(0, '55.765')] -[2024-07-05 15:38:22,442][03976] Updated weights for policy 0, policy_version 72425 (0.0007) -[2024-07-05 15:38:24,191][03976] Updated weights for policy 0, policy_version 72435 (0.0008) -[2024-07-05 15:38:25,857][03976] Updated weights for policy 0, policy_version 72445 (0.0008) -[2024-07-05 15:38:26,167][03423] Fps is (10 sec: 47513.7, 60 sec: 48469.5, 300 sec: 48374.5). Total num frames: 573472768. Throughput: 0: 12098.4. Samples: 30847672. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:26,168][03423] Avg episode reward: [(0, '54.485')] -[2024-07-05 15:38:27,537][03976] Updated weights for policy 0, policy_version 72455 (0.0007) -[2024-07-05 15:38:29,241][03976] Updated weights for policy 0, policy_version 72465 (0.0008) -[2024-07-05 15:38:31,013][03976] Updated weights for policy 0, policy_version 72475 (0.0010) -[2024-07-05 15:38:31,167][03423] Fps is (10 sec: 48333.4, 60 sec: 48469.4, 300 sec: 48374.4). Total num frames: 573718528. Throughput: 0: 12065.0. Samples: 30919784. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:31,169][03423] Avg episode reward: [(0, '56.021')] -[2024-07-05 15:38:32,749][03976] Updated weights for policy 0, policy_version 72485 (0.0008) -[2024-07-05 15:38:34,440][03976] Updated weights for policy 0, policy_version 72495 (0.0008) -[2024-07-05 15:38:36,117][03976] Updated weights for policy 0, policy_version 72505 (0.0008) -[2024-07-05 15:38:36,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48374.5). Total num frames: 573956096. Throughput: 0: 12033.8. Samples: 30955156. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:36,169][03423] Avg episode reward: [(0, '54.420')] -[2024-07-05 15:38:37,805][03976] Updated weights for policy 0, policy_version 72515 (0.0008) -[2024-07-05 15:38:39,485][03976] Updated weights for policy 0, policy_version 72525 (0.0007) -[2024-07-05 15:38:41,153][03976] Updated weights for policy 0, policy_version 72535 (0.0007) -[2024-07-05 15:38:41,167][03423] Fps is (10 sec: 48332.7, 60 sec: 48332.7, 300 sec: 48374.5). Total num frames: 574201856. Throughput: 0: 12066.9. Samples: 31027964. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:41,168][03423] Avg episode reward: [(0, '55.227')] -[2024-07-05 15:38:42,820][03976] Updated weights for policy 0, policy_version 72545 (0.0008) -[2024-07-05 15:38:44,505][03976] Updated weights for policy 0, policy_version 72555 (0.0008) -[2024-07-05 15:38:46,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48346.7). Total num frames: 574439424. Throughput: 0: 12062.3. Samples: 31101076. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:46,169][03423] Avg episode reward: [(0, '54.471')] -[2024-07-05 15:38:46,227][03976] Updated weights for policy 0, policy_version 72565 (0.0010) -[2024-07-05 15:38:47,873][03976] Updated weights for policy 0, policy_version 72575 (0.0007) -[2024-07-05 15:38:49,531][03976] Updated weights for policy 0, policy_version 72585 (0.0008) -[2024-07-05 15:38:51,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48374.5). Total num frames: 574685184. Throughput: 0: 12090.6. Samples: 31138052. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:51,168][03423] Avg episode reward: [(0, '53.569')] -[2024-07-05 15:38:51,184][03976] Updated weights for policy 0, policy_version 72595 (0.0010) -[2024-07-05 15:38:52,830][03976] Updated weights for policy 0, policy_version 72605 (0.0010) -[2024-07-05 15:38:54,481][03976] Updated weights for policy 0, policy_version 72615 (0.0009) -[2024-07-05 15:38:56,160][03976] Updated weights for policy 0, policy_version 72625 (0.0007) -[2024-07-05 15:38:56,167][03423] Fps is (10 sec: 49971.5, 60 sec: 48606.0, 300 sec: 48430.0). Total num frames: 574939136. Throughput: 0: 12123.2. Samples: 31212076. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:38:56,169][03423] Avg episode reward: [(0, '54.826')] -[2024-07-05 15:38:57,860][03976] Updated weights for policy 0, policy_version 72635 (0.0008) -[2024-07-05 15:38:59,503][03976] Updated weights for policy 0, policy_version 72645 (0.0007) -[2024-07-05 15:39:01,168][03423] Fps is (10 sec: 49152.0, 60 sec: 48469.4, 300 sec: 48402.2). Total num frames: 575176704. Throughput: 0: 12145.2. Samples: 31285808. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:39:01,168][03423] Avg episode reward: [(0, '54.825')] -[2024-07-05 15:39:01,181][03976] Updated weights for policy 0, policy_version 72655 (0.0011) -[2024-07-05 15:39:02,916][03976] Updated weights for policy 0, policy_version 72665 (0.0010) -[2024-07-05 15:39:04,569][03976] Updated weights for policy 0, policy_version 72675 (0.0007) -[2024-07-05 15:39:06,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48605.8, 300 sec: 48430.0). Total num frames: 575422464. Throughput: 0: 12135.3. Samples: 31321900. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:39:06,169][03423] Avg episode reward: [(0, '54.929')] -[2024-07-05 15:39:06,236][03976] Updated weights for policy 0, policy_version 72685 (0.0011) -[2024-07-05 15:39:07,945][03976] Updated weights for policy 0, policy_version 72695 (0.0008) -[2024-07-05 15:39:09,629][03976] Updated weights for policy 0, policy_version 72705 (0.0007) -[2024-07-05 15:39:11,168][03423] Fps is (10 sec: 49152.0, 60 sec: 48606.0, 300 sec: 48457.8). Total num frames: 575668224. Throughput: 0: 12163.1. Samples: 31395012. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:39:11,169][03423] Avg episode reward: [(0, '54.557')] -[2024-07-05 15:39:11,272][03976] Updated weights for policy 0, policy_version 72715 (0.0007) -[2024-07-05 15:39:12,979][03976] Updated weights for policy 0, policy_version 72725 (0.0008) -[2024-07-05 15:39:14,700][03976] Updated weights for policy 0, policy_version 72735 (0.0008) -[2024-07-05 15:39:16,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48469.2, 300 sec: 48430.0). Total num frames: 575905792. Throughput: 0: 12184.1. Samples: 31468072. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:39:16,169][03423] Avg episode reward: [(0, '54.526')] -[2024-07-05 15:39:16,326][03976] Updated weights for policy 0, policy_version 72745 (0.0007) -[2024-07-05 15:39:18,029][03976] Updated weights for policy 0, policy_version 72755 (0.0008) -[2024-07-05 15:39:19,745][03976] Updated weights for policy 0, policy_version 72765 (0.0007) -[2024-07-05 15:39:21,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48606.0, 300 sec: 48457.8). Total num frames: 576151552. Throughput: 0: 12195.4. Samples: 31503948. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:39:21,168][03423] Avg episode reward: [(0, '52.965')] -[2024-07-05 15:39:21,432][03976] Updated weights for policy 0, policy_version 72775 (0.0007) -[2024-07-05 15:39:23,124][03976] Updated weights for policy 0, policy_version 72785 (0.0007) -[2024-07-05 15:39:24,844][03976] Updated weights for policy 0, policy_version 72795 (0.0008) -[2024-07-05 15:39:26,167][03423] Fps is (10 sec: 48333.7, 60 sec: 48605.9, 300 sec: 48430.0). Total num frames: 576389120. Throughput: 0: 12188.9. Samples: 31576464. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:39:26,168][03423] Avg episode reward: [(0, '56.055')] -[2024-07-05 15:39:26,554][03976] Updated weights for policy 0, policy_version 72805 (0.0007) -[2024-07-05 15:39:28,244][03976] Updated weights for policy 0, policy_version 72815 (0.0009) -[2024-07-05 15:39:29,960][03976] Updated weights for policy 0, policy_version 72825 (0.0008) -[2024-07-05 15:39:31,169][03423] Fps is (10 sec: 48323.4, 60 sec: 48604.3, 300 sec: 48457.5). Total num frames: 576634880. Throughput: 0: 12170.3. Samples: 31648764. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:39:31,171][03423] Avg episode reward: [(0, '54.151')] -[2024-07-05 15:39:31,643][03976] Updated weights for policy 0, policy_version 72835 (0.0007) -[2024-07-05 15:39:33,341][03976] Updated weights for policy 0, policy_version 72845 (0.0007) -[2024-07-05 15:39:35,108][03976] Updated weights for policy 0, policy_version 72855 (0.0008) -[2024-07-05 15:39:36,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48605.8, 300 sec: 48457.8). Total num frames: 576872448. Throughput: 0: 12163.0. Samples: 31685388. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:39:36,169][03423] Avg episode reward: [(0, '54.041')] -[2024-07-05 15:39:36,813][03976] Updated weights for policy 0, policy_version 72865 (0.0007) -[2024-07-05 15:39:38,546][03976] Updated weights for policy 0, policy_version 72875 (0.0008) -[2024-07-05 15:39:40,266][03976] Updated weights for policy 0, policy_version 72885 (0.0008) -[2024-07-05 15:39:41,167][03423] Fps is (10 sec: 47522.8, 60 sec: 48469.3, 300 sec: 48430.0). Total num frames: 577110016. Throughput: 0: 12094.0. Samples: 31756304. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:39:41,168][03423] Avg episode reward: [(0, '55.800')] -[2024-07-05 15:39:41,972][03976] Updated weights for policy 0, policy_version 72895 (0.0008) -[2024-07-05 15:39:43,669][03976] Updated weights for policy 0, policy_version 72905 (0.0010) -[2024-07-05 15:39:45,357][03976] Updated weights for policy 0, policy_version 72915 (0.0010) -[2024-07-05 15:39:46,168][03423] Fps is (10 sec: 47513.3, 60 sec: 48469.3, 300 sec: 48430.0). Total num frames: 577347584. Throughput: 0: 12055.8. Samples: 31828320. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:39:46,169][03423] Avg episode reward: [(0, '52.945')] -[2024-07-05 15:39:47,023][03976] Updated weights for policy 0, policy_version 72925 (0.0007) -[2024-07-05 15:39:48,720][03976] Updated weights for policy 0, policy_version 72935 (0.0010) -[2024-07-05 15:39:50,391][03976] Updated weights for policy 0, policy_version 72945 (0.0008) -[2024-07-05 15:39:51,168][03423] Fps is (10 sec: 48331.4, 60 sec: 48469.1, 300 sec: 48430.0). Total num frames: 577593344. Throughput: 0: 12065.5. Samples: 31864848. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:39:51,169][03423] Avg episode reward: [(0, '56.108')] -[2024-07-05 15:39:52,068][03976] Updated weights for policy 0, policy_version 72955 (0.0008) -[2024-07-05 15:39:53,772][03976] Updated weights for policy 0, policy_version 72965 (0.0010) -[2024-07-05 15:39:55,439][03976] Updated weights for policy 0, policy_version 72975 (0.0007) -[2024-07-05 15:39:56,168][03423] Fps is (10 sec: 49151.6, 60 sec: 48332.6, 300 sec: 48457.7). Total num frames: 577839104. Throughput: 0: 12073.1. Samples: 31938304. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:39:56,169][03423] Avg episode reward: [(0, '53.253')] -[2024-07-05 15:39:57,111][03976] Updated weights for policy 0, policy_version 72985 (0.0008) -[2024-07-05 15:39:58,825][03976] Updated weights for policy 0, policy_version 72995 (0.0007) -[2024-07-05 15:40:00,497][03976] Updated weights for policy 0, policy_version 73005 (0.0008) -[2024-07-05 15:40:01,167][03423] Fps is (10 sec: 48334.1, 60 sec: 48332.8, 300 sec: 48457.8). Total num frames: 578076672. Throughput: 0: 12067.6. Samples: 32011112. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:40:01,168][03423] Avg episode reward: [(0, '55.606')] -[2024-07-05 15:40:02,183][03976] Updated weights for policy 0, policy_version 73015 (0.0010) -[2024-07-05 15:40:03,845][03976] Updated weights for policy 0, policy_version 73025 (0.0007) -[2024-07-05 15:40:05,499][03976] Updated weights for policy 0, policy_version 73035 (0.0007) -[2024-07-05 15:40:06,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48332.6, 300 sec: 48485.5). Total num frames: 578322432. Throughput: 0: 12074.0. Samples: 32047280. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:40:06,169][03423] Avg episode reward: [(0, '54.594')] -[2024-07-05 15:40:06,174][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000073039_578330624.pth... -[2024-07-05 15:40:06,241][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000071617_566681600.pth -[2024-07-05 15:40:07,176][03976] Updated weights for policy 0, policy_version 73045 (0.0007) -[2024-07-05 15:40:08,859][03976] Updated weights for policy 0, policy_version 73055 (0.0007) -[2024-07-05 15:40:10,569][03976] Updated weights for policy 0, policy_version 73065 (0.0007) -[2024-07-05 15:40:11,168][03423] Fps is (10 sec: 49151.6, 60 sec: 48332.8, 300 sec: 48513.3). Total num frames: 578568192. Throughput: 0: 12102.1. Samples: 32121060. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:40:11,169][03423] Avg episode reward: [(0, '55.232')] -[2024-07-05 15:40:12,253][03976] Updated weights for policy 0, policy_version 73075 (0.0007) -[2024-07-05 15:40:13,913][03976] Updated weights for policy 0, policy_version 73085 (0.0008) -[2024-07-05 15:40:15,573][03976] Updated weights for policy 0, policy_version 73095 (0.0007) -[2024-07-05 15:40:16,167][03423] Fps is (10 sec: 49153.4, 60 sec: 48469.5, 300 sec: 48541.1). Total num frames: 578813952. Throughput: 0: 12120.0. Samples: 32194140. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:40:16,168][03423] Avg episode reward: [(0, '56.056')] -[2024-07-05 15:40:17,270][03976] Updated weights for policy 0, policy_version 73105 (0.0009) -[2024-07-05 15:40:18,932][03976] Updated weights for policy 0, policy_version 73115 (0.0008) -[2024-07-05 15:40:20,604][03976] Updated weights for policy 0, policy_version 73125 (0.0011) -[2024-07-05 15:40:21,168][03423] Fps is (10 sec: 49152.0, 60 sec: 48469.3, 300 sec: 48541.0). Total num frames: 579059712. Throughput: 0: 12112.7. Samples: 32230460. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:40:21,169][03423] Avg episode reward: [(0, '57.289')] -[2024-07-05 15:40:22,261][03976] Updated weights for policy 0, policy_version 73135 (0.0009) -[2024-07-05 15:40:23,930][03976] Updated weights for policy 0, policy_version 73145 (0.0010) -[2024-07-05 15:40:25,574][03976] Updated weights for policy 0, policy_version 73155 (0.0009) -[2024-07-05 15:40:26,168][03423] Fps is (10 sec: 48331.9, 60 sec: 48469.2, 300 sec: 48513.3). Total num frames: 579297280. Throughput: 0: 12185.2. Samples: 32304640. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:40:26,169][03423] Avg episode reward: [(0, '55.093')] -[2024-07-05 15:40:27,313][03976] Updated weights for policy 0, policy_version 73165 (0.0008) -[2024-07-05 15:40:28,971][03976] Updated weights for policy 0, policy_version 73175 (0.0008) -[2024-07-05 15:40:30,624][03976] Updated weights for policy 0, policy_version 73185 (0.0007) -[2024-07-05 15:40:31,167][03423] Fps is (10 sec: 48333.1, 60 sec: 48470.9, 300 sec: 48513.3). Total num frames: 579543040. Throughput: 0: 12208.9. Samples: 32377720. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:40:31,169][03423] Avg episode reward: [(0, '53.293')] -[2024-07-05 15:40:32,325][03976] Updated weights for policy 0, policy_version 73195 (0.0007) -[2024-07-05 15:40:34,051][03976] Updated weights for policy 0, policy_version 73205 (0.0007) -[2024-07-05 15:40:35,760][03976] Updated weights for policy 0, policy_version 73215 (0.0008) -[2024-07-05 15:40:36,167][03423] Fps is (10 sec: 49153.0, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 579788800. Throughput: 0: 12209.9. Samples: 32414288. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:40:36,168][03423] Avg episode reward: [(0, '52.147')] -[2024-07-05 15:40:37,460][03976] Updated weights for policy 0, policy_version 73225 (0.0008) -[2024-07-05 15:40:39,184][03976] Updated weights for policy 0, policy_version 73235 (0.0007) -[2024-07-05 15:40:40,891][03976] Updated weights for policy 0, policy_version 73245 (0.0011) -[2024-07-05 15:40:41,167][03423] Fps is (10 sec: 48332.7, 60 sec: 48605.9, 300 sec: 48541.1). Total num frames: 580026368. Throughput: 0: 12171.9. Samples: 32486036. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:40:41,169][03423] Avg episode reward: [(0, '55.639')] -[2024-07-05 15:40:42,558][03976] Updated weights for policy 0, policy_version 73255 (0.0007) -[2024-07-05 15:40:44,235][03976] Updated weights for policy 0, policy_version 73265 (0.0008) -[2024-07-05 15:40:45,938][03976] Updated weights for policy 0, policy_version 73275 (0.0008) -[2024-07-05 15:40:46,168][03423] Fps is (10 sec: 48332.5, 60 sec: 48742.4, 300 sec: 48568.8). Total num frames: 580272128. Throughput: 0: 12171.2. Samples: 32558816. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:40:46,169][03423] Avg episode reward: [(0, '55.276')] -[2024-07-05 15:40:47,639][03976] Updated weights for policy 0, policy_version 73285 (0.0011) -[2024-07-05 15:40:49,314][03976] Updated weights for policy 0, policy_version 73295 (0.0007) -[2024-07-05 15:40:50,977][03976] Updated weights for policy 0, policy_version 73305 (0.0008) -[2024-07-05 15:40:51,167][03423] Fps is (10 sec: 48333.1, 60 sec: 48606.1, 300 sec: 48541.1). Total num frames: 580509696. Throughput: 0: 12161.6. Samples: 32594548. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:40:51,168][03423] Avg episode reward: [(0, '53.045')] -[2024-07-05 15:40:52,690][03976] Updated weights for policy 0, policy_version 73315 (0.0008) -[2024-07-05 15:40:54,392][03976] Updated weights for policy 0, policy_version 73325 (0.0007) -[2024-07-05 15:40:56,061][03976] Updated weights for policy 0, policy_version 73335 (0.0008) -[2024-07-05 15:40:56,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48606.0, 300 sec: 48568.8). Total num frames: 580755456. Throughput: 0: 12145.5. Samples: 32667608. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 15:40:56,169][03423] Avg episode reward: [(0, '56.377')] -[2024-07-05 15:40:57,723][03976] Updated weights for policy 0, policy_version 73345 (0.0008) -[2024-07-05 15:40:59,436][03976] Updated weights for policy 0, policy_version 73355 (0.0009) -[2024-07-05 15:41:01,142][03976] Updated weights for policy 0, policy_version 73365 (0.0008) -[2024-07-05 15:41:01,167][03423] Fps is (10 sec: 49151.7, 60 sec: 48742.4, 300 sec: 48568.8). Total num frames: 581001216. Throughput: 0: 12144.0. Samples: 32740620. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 15:41:01,168][03423] Avg episode reward: [(0, '56.599')] -[2024-07-05 15:41:02,808][03976] Updated weights for policy 0, policy_version 73375 (0.0008) -[2024-07-05 15:41:04,472][03976] Updated weights for policy 0, policy_version 73385 (0.0008) -[2024-07-05 15:41:06,150][03976] Updated weights for policy 0, policy_version 73395 (0.0007) -[2024-07-05 15:41:06,167][03423] Fps is (10 sec: 49152.3, 60 sec: 48742.6, 300 sec: 48569.5). Total num frames: 581246976. Throughput: 0: 12146.2. Samples: 32777040. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 15:41:06,168][03423] Avg episode reward: [(0, '57.171')] -[2024-07-05 15:41:07,887][03976] Updated weights for policy 0, policy_version 73405 (0.0008) -[2024-07-05 15:41:09,671][03976] Updated weights for policy 0, policy_version 73415 (0.0008) -[2024-07-05 15:41:11,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48605.9, 300 sec: 48568.8). Total num frames: 581484544. Throughput: 0: 12097.0. Samples: 32849004. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 15:41:11,168][03423] Avg episode reward: [(0, '55.715')] -[2024-07-05 15:41:11,367][03976] Updated weights for policy 0, policy_version 73425 (0.0009) -[2024-07-05 15:41:13,037][03976] Updated weights for policy 0, policy_version 73435 (0.0008) -[2024-07-05 15:41:14,733][03976] Updated weights for policy 0, policy_version 73445 (0.0008) -[2024-07-05 15:41:16,168][03423] Fps is (10 sec: 47513.4, 60 sec: 48469.3, 300 sec: 48541.1). Total num frames: 581722112. Throughput: 0: 12076.0. Samples: 32921140. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 15:41:16,169][03423] Avg episode reward: [(0, '55.956')] -[2024-07-05 15:41:16,412][03976] Updated weights for policy 0, policy_version 73455 (0.0008) -[2024-07-05 15:41:18,097][03976] Updated weights for policy 0, policy_version 73465 (0.0007) -[2024-07-05 15:41:19,788][03976] Updated weights for policy 0, policy_version 73475 (0.0008) -[2024-07-05 15:41:21,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48469.3, 300 sec: 48541.1). Total num frames: 581967872. Throughput: 0: 12074.5. Samples: 32957644. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 15:41:21,169][03423] Avg episode reward: [(0, '55.307')] -[2024-07-05 15:41:21,451][03976] Updated weights for policy 0, policy_version 73485 (0.0008) -[2024-07-05 15:41:23,126][03976] Updated weights for policy 0, policy_version 73495 (0.0007) -[2024-07-05 15:41:24,764][03976] Updated weights for policy 0, policy_version 73505 (0.0009) -[2024-07-05 15:41:26,168][03423] Fps is (10 sec: 49149.0, 60 sec: 48605.5, 300 sec: 48568.7). Total num frames: 582213632. Throughput: 0: 12115.5. Samples: 33031240. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 15:41:26,169][03423] Avg episode reward: [(0, '57.195')] -[2024-07-05 15:41:26,489][03976] Updated weights for policy 0, policy_version 73515 (0.0007) -[2024-07-05 15:41:28,192][03976] Updated weights for policy 0, policy_version 73525 (0.0007) -[2024-07-05 15:41:29,974][03976] Updated weights for policy 0, policy_version 73535 (0.0007) -[2024-07-05 15:41:31,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48469.3, 300 sec: 48541.1). Total num frames: 582451200. Throughput: 0: 12084.2. Samples: 33102604. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 15:41:31,169][03423] Avg episode reward: [(0, '53.946')] -[2024-07-05 15:41:31,686][03976] Updated weights for policy 0, policy_version 73545 (0.0008) -[2024-07-05 15:41:33,363][03976] Updated weights for policy 0, policy_version 73555 (0.0009) -[2024-07-05 15:41:35,043][03976] Updated weights for policy 0, policy_version 73565 (0.0008) -[2024-07-05 15:41:36,168][03423] Fps is (10 sec: 48335.8, 60 sec: 48469.3, 300 sec: 48541.1). Total num frames: 582696960. Throughput: 0: 12104.9. Samples: 33139268. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) -[2024-07-05 15:41:36,168][03423] Avg episode reward: [(0, '54.878')] -[2024-07-05 15:41:36,719][03976] Updated weights for policy 0, policy_version 73575 (0.0007) -[2024-07-05 15:41:38,361][03976] Updated weights for policy 0, policy_version 73585 (0.0007) -[2024-07-05 15:41:40,006][03976] Updated weights for policy 0, policy_version 73595 (0.0008) -[2024-07-05 15:41:41,167][03423] Fps is (10 sec: 48333.2, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 582934528. Throughput: 0: 12111.1. Samples: 33212608. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:41:41,168][03423] Avg episode reward: [(0, '53.051')] -[2024-07-05 15:41:41,721][03976] Updated weights for policy 0, policy_version 73605 (0.0008) -[2024-07-05 15:41:43,453][03976] Updated weights for policy 0, policy_version 73615 (0.0011) -[2024-07-05 15:41:45,172][03976] Updated weights for policy 0, policy_version 73625 (0.0008) -[2024-07-05 15:41:46,168][03423] Fps is (10 sec: 47513.0, 60 sec: 48332.7, 300 sec: 48485.5). Total num frames: 583172096. Throughput: 0: 12082.6. Samples: 33284340. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:41:46,169][03423] Avg episode reward: [(0, '55.566')] -[2024-07-05 15:41:46,907][03976] Updated weights for policy 0, policy_version 73635 (0.0007) -[2024-07-05 15:41:48,584][03976] Updated weights for policy 0, policy_version 73645 (0.0009) -[2024-07-05 15:41:50,248][03976] Updated weights for policy 0, policy_version 73655 (0.0010) -[2024-07-05 15:41:51,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48469.3, 300 sec: 48513.3). Total num frames: 583417856. Throughput: 0: 12080.3. Samples: 33320652. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:41:51,169][03423] Avg episode reward: [(0, '52.480')] -[2024-07-05 15:41:51,932][03976] Updated weights for policy 0, policy_version 73665 (0.0007) -[2024-07-05 15:41:53,643][03976] Updated weights for policy 0, policy_version 73675 (0.0007) -[2024-07-05 15:41:55,400][03976] Updated weights for policy 0, policy_version 73685 (0.0010) -[2024-07-05 15:41:56,167][03423] Fps is (10 sec: 48333.8, 60 sec: 48332.9, 300 sec: 48485.6). Total num frames: 583655424. Throughput: 0: 12089.3. Samples: 33393020. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:41:56,168][03423] Avg episode reward: [(0, '55.880')] -[2024-07-05 15:41:57,126][03976] Updated weights for policy 0, policy_version 73695 (0.0009) -[2024-07-05 15:41:58,795][03976] Updated weights for policy 0, policy_version 73705 (0.0008) -[2024-07-05 15:42:00,424][03976] Updated weights for policy 0, policy_version 73715 (0.0007) -[2024-07-05 15:42:01,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48332.8, 300 sec: 48513.3). Total num frames: 583901184. Throughput: 0: 12100.4. Samples: 33465660. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:01,169][03423] Avg episode reward: [(0, '54.856')] -[2024-07-05 15:42:02,160][03976] Updated weights for policy 0, policy_version 73725 (0.0010) -[2024-07-05 15:42:03,865][03976] Updated weights for policy 0, policy_version 73735 (0.0007) -[2024-07-05 15:42:05,532][03976] Updated weights for policy 0, policy_version 73745 (0.0007) -[2024-07-05 15:42:06,167][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.3, 300 sec: 48485.6). Total num frames: 584138752. Throughput: 0: 12086.3. Samples: 33501528. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:06,169][03423] Avg episode reward: [(0, '52.930')] -[2024-07-05 15:42:06,218][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000073749_584146944.pth... -[2024-07-05 15:42:06,284][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000072329_572514304.pth -[2024-07-05 15:42:07,208][03976] Updated weights for policy 0, policy_version 73755 (0.0010) -[2024-07-05 15:42:08,883][03976] Updated weights for policy 0, policy_version 73765 (0.0008) -[2024-07-05 15:42:10,547][03976] Updated weights for policy 0, policy_version 73775 (0.0010) -[2024-07-05 15:42:11,167][03423] Fps is (10 sec: 48333.2, 60 sec: 48332.8, 300 sec: 48485.5). Total num frames: 584384512. Throughput: 0: 12081.7. Samples: 33574908. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:11,168][03423] Avg episode reward: [(0, '54.521')] -[2024-07-05 15:42:12,203][03976] Updated weights for policy 0, policy_version 73785 (0.0009) -[2024-07-05 15:42:13,871][03976] Updated weights for policy 0, policy_version 73795 (0.0008) -[2024-07-05 15:42:15,553][03976] Updated weights for policy 0, policy_version 73805 (0.0008) -[2024-07-05 15:42:16,167][03423] Fps is (10 sec: 49151.8, 60 sec: 48469.4, 300 sec: 48513.3). Total num frames: 584630272. Throughput: 0: 12130.8. Samples: 33648488. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:16,169][03423] Avg episode reward: [(0, '56.605')] -[2024-07-05 15:42:17,267][03976] Updated weights for policy 0, policy_version 73815 (0.0012) -[2024-07-05 15:42:18,942][03976] Updated weights for policy 0, policy_version 73825 (0.0008) -[2024-07-05 15:42:20,711][03976] Updated weights for policy 0, policy_version 73835 (0.0007) -[2024-07-05 15:42:21,167][03423] Fps is (10 sec: 48332.7, 60 sec: 48332.9, 300 sec: 48485.6). Total num frames: 584867840. Throughput: 0: 12115.8. Samples: 33684480. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:21,169][03423] Avg episode reward: [(0, '55.942')] -[2024-07-05 15:42:22,369][03976] Updated weights for policy 0, policy_version 73845 (0.0008) -[2024-07-05 15:42:24,097][03976] Updated weights for policy 0, policy_version 73855 (0.0007) -[2024-07-05 15:42:25,769][03976] Updated weights for policy 0, policy_version 73865 (0.0007) -[2024-07-05 15:42:26,167][03423] Fps is (10 sec: 48332.8, 60 sec: 48333.3, 300 sec: 48485.5). Total num frames: 585113600. Throughput: 0: 12082.0. Samples: 33756300. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:26,168][03423] Avg episode reward: [(0, '55.099')] -[2024-07-05 15:42:27,462][03976] Updated weights for policy 0, policy_version 73875 (0.0008) -[2024-07-05 15:42:29,136][03976] Updated weights for policy 0, policy_version 73885 (0.0007) -[2024-07-05 15:42:30,909][03976] Updated weights for policy 0, policy_version 73895 (0.0008) -[2024-07-05 15:42:31,168][03423] Fps is (10 sec: 48331.3, 60 sec: 48332.6, 300 sec: 48457.7). Total num frames: 585351168. Throughput: 0: 12111.7. Samples: 33829368. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:31,169][03423] Avg episode reward: [(0, '54.323')] -[2024-07-05 15:42:32,596][03976] Updated weights for policy 0, policy_version 73905 (0.0008) -[2024-07-05 15:42:34,287][03976] Updated weights for policy 0, policy_version 73915 (0.0011) -[2024-07-05 15:42:35,968][03976] Updated weights for policy 0, policy_version 73925 (0.0007) -[2024-07-05 15:42:36,168][03423] Fps is (10 sec: 48332.0, 60 sec: 48332.7, 300 sec: 48457.7). Total num frames: 585596928. Throughput: 0: 12110.1. Samples: 33865608. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:36,169][03423] Avg episode reward: [(0, '55.651')] -[2024-07-05 15:42:37,668][03976] Updated weights for policy 0, policy_version 73935 (0.0008) -[2024-07-05 15:42:39,333][03976] Updated weights for policy 0, policy_version 73945 (0.0008) -[2024-07-05 15:42:41,041][03976] Updated weights for policy 0, policy_version 73955 (0.0007) -[2024-07-05 15:42:41,167][03423] Fps is (10 sec: 48334.4, 60 sec: 48332.8, 300 sec: 48457.8). Total num frames: 585834496. Throughput: 0: 12103.6. Samples: 33937684. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:41,168][03423] Avg episode reward: [(0, '53.653')] -[2024-07-05 15:42:42,741][03976] Updated weights for policy 0, policy_version 73965 (0.0007) -[2024-07-05 15:42:44,417][03976] Updated weights for policy 0, policy_version 73975 (0.0007) -[2024-07-05 15:42:46,086][03976] Updated weights for policy 0, policy_version 73985 (0.0007) -[2024-07-05 15:42:46,167][03423] Fps is (10 sec: 48333.6, 60 sec: 48469.5, 300 sec: 48457.8). Total num frames: 586080256. Throughput: 0: 12118.6. Samples: 34010996. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:46,168][03423] Avg episode reward: [(0, '56.136')] -[2024-07-05 15:42:47,773][03976] Updated weights for policy 0, policy_version 73995 (0.0008) -[2024-07-05 15:42:49,459][03976] Updated weights for policy 0, policy_version 74005 (0.0007) -[2024-07-05 15:42:51,168][03423] Fps is (10 sec: 48332.3, 60 sec: 48332.7, 300 sec: 48457.8). Total num frames: 586317824. Throughput: 0: 12128.3. Samples: 34047304. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:51,169][03423] Avg episode reward: [(0, '54.209')] -[2024-07-05 15:42:51,192][03976] Updated weights for policy 0, policy_version 74015 (0.0007) -[2024-07-05 15:42:52,857][03976] Updated weights for policy 0, policy_version 74025 (0.0009) -[2024-07-05 15:42:54,553][03976] Updated weights for policy 0, policy_version 74035 (0.0008) -[2024-07-05 15:42:56,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48469.2, 300 sec: 48457.8). Total num frames: 586563584. Throughput: 0: 12102.1. Samples: 34119504. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:42:56,169][03423] Avg episode reward: [(0, '53.605')] -[2024-07-05 15:42:56,249][03976] Updated weights for policy 0, policy_version 74045 (0.0007) -[2024-07-05 15:42:57,930][03976] Updated weights for policy 0, policy_version 74055 (0.0010) -[2024-07-05 15:42:59,684][03976] Updated weights for policy 0, policy_version 74065 (0.0010) -[2024-07-05 15:43:01,167][03423] Fps is (10 sec: 48333.3, 60 sec: 48332.9, 300 sec: 48457.8). Total num frames: 586801152. Throughput: 0: 12063.3. Samples: 34191336. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:43:01,169][03423] Avg episode reward: [(0, '56.439')] -[2024-07-05 15:43:01,417][03976] Updated weights for policy 0, policy_version 74075 (0.0008) -[2024-07-05 15:43:03,051][03976] Updated weights for policy 0, policy_version 74085 (0.0008) -[2024-07-05 15:43:04,749][03976] Updated weights for policy 0, policy_version 74095 (0.0008) -[2024-07-05 15:43:06,168][03423] Fps is (10 sec: 48332.9, 60 sec: 48469.3, 300 sec: 48457.8). Total num frames: 587046912. Throughput: 0: 12077.4. Samples: 34227964. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:43:06,169][03423] Avg episode reward: [(0, '55.268')] -[2024-07-05 15:43:06,440][03976] Updated weights for policy 0, policy_version 74105 (0.0008) -[2024-07-05 15:43:08,117][03976] Updated weights for policy 0, policy_version 74115 (0.0009) -[2024-07-05 15:43:09,879][03976] Updated weights for policy 0, policy_version 74125 (0.0008) -[2024-07-05 15:43:11,168][03423] Fps is (10 sec: 48332.2, 60 sec: 48332.7, 300 sec: 48430.0). Total num frames: 587284480. Throughput: 0: 12088.9. Samples: 34300300. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:43:11,169][03423] Avg episode reward: [(0, '54.949')] -[2024-07-05 15:43:11,593][03976] Updated weights for policy 0, policy_version 74135 (0.0010) -[2024-07-05 15:43:13,261][03976] Updated weights for policy 0, policy_version 74145 (0.0008) -[2024-07-05 15:43:14,966][03976] Updated weights for policy 0, policy_version 74155 (0.0007) -[2024-07-05 15:43:16,168][03423] Fps is (10 sec: 47513.5, 60 sec: 48196.2, 300 sec: 48430.0). Total num frames: 587522048. Throughput: 0: 12062.8. Samples: 34372192. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:43:16,169][03423] Avg episode reward: [(0, '53.853')] -[2024-07-05 15:43:16,641][03976] Updated weights for policy 0, policy_version 74165 (0.0008) -[2024-07-05 15:43:18,322][03976] Updated weights for policy 0, policy_version 74175 (0.0008) -[2024-07-05 15:43:20,038][03976] Updated weights for policy 0, policy_version 74185 (0.0008) -[2024-07-05 15:43:21,168][03423] Fps is (10 sec: 48333.3, 60 sec: 48332.8, 300 sec: 48457.8). Total num frames: 587767808. Throughput: 0: 12067.5. Samples: 34408644. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:43:21,169][03423] Avg episode reward: [(0, '55.823')] -[2024-07-05 15:43:21,754][03976] Updated weights for policy 0, policy_version 74195 (0.0010) -[2024-07-05 15:43:23,427][03976] Updated weights for policy 0, policy_version 74205 (0.0010) -[2024-07-05 15:43:25,134][03976] Updated weights for policy 0, policy_version 74215 (0.0008) -[2024-07-05 15:43:26,168][03423] Fps is (10 sec: 49152.1, 60 sec: 48332.8, 300 sec: 48457.8). Total num frames: 588013568. Throughput: 0: 12086.4. Samples: 34481572. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:43:26,169][03423] Avg episode reward: [(0, '55.682')] -[2024-07-05 15:43:26,824][03976] Updated weights for policy 0, policy_version 74225 (0.0008) -[2024-07-05 15:43:28,500][03976] Updated weights for policy 0, policy_version 74235 (0.0007) -[2024-07-05 15:43:30,164][03976] Updated weights for policy 0, policy_version 74245 (0.0008) -[2024-07-05 15:43:31,167][03423] Fps is (10 sec: 49152.4, 60 sec: 48469.6, 300 sec: 48485.5). Total num frames: 588259328. Throughput: 0: 12079.1. Samples: 34554556. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:43:31,168][03423] Avg episode reward: [(0, '55.698')] -[2024-07-05 15:43:31,840][03976] Updated weights for policy 0, policy_version 74255 (0.0007) -[2024-07-05 15:43:33,516][03976] Updated weights for policy 0, policy_version 74265 (0.0007) -[2024-07-05 15:43:35,217][03976] Updated weights for policy 0, policy_version 74275 (0.0008) -[2024-07-05 15:43:36,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48332.9, 300 sec: 48457.8). Total num frames: 588496896. Throughput: 0: 12084.3. Samples: 34591096. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:43:36,169][03423] Avg episode reward: [(0, '55.852')] -[2024-07-05 15:43:36,853][03976] Updated weights for policy 0, policy_version 74285 (0.0007) -[2024-07-05 15:43:38,532][03976] Updated weights for policy 0, policy_version 74295 (0.0010) -[2024-07-05 15:43:40,217][03976] Updated weights for policy 0, policy_version 74305 (0.0007) -[2024-07-05 15:43:41,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48469.3, 300 sec: 48485.5). Total num frames: 588742656. Throughput: 0: 12115.0. Samples: 34664680. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:43:41,169][03423] Avg episode reward: [(0, '56.174')] -[2024-07-05 15:43:41,946][03976] Updated weights for policy 0, policy_version 74315 (0.0007) -[2024-07-05 15:43:43,653][03976] Updated weights for policy 0, policy_version 74325 (0.0007) -[2024-07-05 15:43:45,357][03976] Updated weights for policy 0, policy_version 74335 (0.0011) -[2024-07-05 15:43:46,168][03423] Fps is (10 sec: 48332.8, 60 sec: 48332.8, 300 sec: 48457.8). Total num frames: 588980224. Throughput: 0: 12124.0. Samples: 34736916. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:43:46,169][03423] Avg episode reward: [(0, '54.117')] -[2024-07-05 15:43:47,068][03976] Updated weights for policy 0, policy_version 74345 (0.0008) -[2024-07-05 15:43:48,763][03976] Updated weights for policy 0, policy_version 74355 (0.0007) -[2024-07-05 15:43:50,499][03976] Updated weights for policy 0, policy_version 74365 (0.0009) -[2024-07-05 15:43:51,168][03423] Fps is (10 sec: 47513.6, 60 sec: 48332.9, 300 sec: 48402.2). Total num frames: 589217792. Throughput: 0: 12106.3. Samples: 34772748. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) -[2024-07-05 15:43:51,169][03423] Avg episode reward: [(0, '56.454')] -[2024-07-05 15:43:52,269][03976] Updated weights for policy 0, policy_version 74375 (0.0009) -[2024-07-05 15:43:54,014][03976] Updated weights for policy 0, policy_version 74385 (0.0008) -[2024-07-05 15:43:55,753][03976] Updated weights for policy 0, policy_version 74395 (0.0008) -[2024-07-05 15:43:56,168][03423] Fps is (10 sec: 47513.7, 60 sec: 48196.3, 300 sec: 48402.2). Total num frames: 589455360. Throughput: 0: 12056.9. Samples: 34842860. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:43:56,169][03423] Avg episode reward: [(0, '54.610')] -[2024-07-05 15:43:57,472][03976] Updated weights for policy 0, policy_version 74405 (0.0007) -[2024-07-05 15:43:59,207][03976] Updated weights for policy 0, policy_version 74415 (0.0009) -[2024-07-05 15:44:00,914][03976] Updated weights for policy 0, policy_version 74425 (0.0008) -[2024-07-05 15:44:01,167][03423] Fps is (10 sec: 47513.8, 60 sec: 48196.3, 300 sec: 48374.5). Total num frames: 589692928. Throughput: 0: 12046.2. Samples: 34914272. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:44:01,169][03423] Avg episode reward: [(0, '51.871')] -[2024-07-05 15:44:02,606][03976] Updated weights for policy 0, policy_version 74435 (0.0008) -[2024-07-05 15:44:04,278][03976] Updated weights for policy 0, policy_version 74445 (0.0010) -[2024-07-05 15:44:06,009][03976] Updated weights for policy 0, policy_version 74455 (0.0012) -[2024-07-05 15:44:06,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48196.3, 300 sec: 48374.5). Total num frames: 589938688. Throughput: 0: 12031.5. Samples: 34950060. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:44:06,168][03423] Avg episode reward: [(0, '54.885')] -[2024-07-05 15:44:06,172][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000074456_589938688.pth... -[2024-07-05 15:44:06,244][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000073039_578330624.pth -[2024-07-05 15:44:07,708][03976] Updated weights for policy 0, policy_version 74465 (0.0008) -[2024-07-05 15:44:09,393][03976] Updated weights for policy 0, policy_version 74475 (0.0007) -[2024-07-05 15:44:11,088][03976] Updated weights for policy 0, policy_version 74485 (0.0008) -[2024-07-05 15:44:11,167][03423] Fps is (10 sec: 48332.7, 60 sec: 48196.4, 300 sec: 48374.5). Total num frames: 590176256. Throughput: 0: 12018.2. Samples: 35022392. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:44:11,169][03423] Avg episode reward: [(0, '54.073')] -[2024-07-05 15:44:12,844][03976] Updated weights for policy 0, policy_version 74495 (0.0010) -[2024-07-05 15:44:14,561][03976] Updated weights for policy 0, policy_version 74505 (0.0011) -[2024-07-05 15:44:16,167][03423] Fps is (10 sec: 47513.7, 60 sec: 48196.3, 300 sec: 48346.7). Total num frames: 590413824. Throughput: 0: 12004.9. Samples: 35094776. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:44:16,168][03423] Avg episode reward: [(0, '53.506')] -[2024-07-05 15:44:16,214][03976] Updated weights for policy 0, policy_version 74515 (0.0008) -[2024-07-05 15:44:17,915][03976] Updated weights for policy 0, policy_version 74525 (0.0008) -[2024-07-05 15:44:19,575][03976] Updated weights for policy 0, policy_version 74535 (0.0007) -[2024-07-05 15:44:21,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48196.2, 300 sec: 48374.4). Total num frames: 590659584. Throughput: 0: 12000.4. Samples: 35131112. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:44:21,169][03423] Avg episode reward: [(0, '55.010')] -[2024-07-05 15:44:21,269][03976] Updated weights for policy 0, policy_version 74545 (0.0008) -[2024-07-05 15:44:22,953][03976] Updated weights for policy 0, policy_version 74555 (0.0008) -[2024-07-05 15:44:24,628][03976] Updated weights for policy 0, policy_version 74565 (0.0012) -[2024-07-05 15:44:26,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48059.7, 300 sec: 48347.0). Total num frames: 590897152. Throughput: 0: 11988.5. Samples: 35204164. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:44:26,168][03423] Avg episode reward: [(0, '56.523')] -[2024-07-05 15:44:26,340][03976] Updated weights for policy 0, policy_version 74575 (0.0007) -[2024-07-05 15:44:28,022][03976] Updated weights for policy 0, policy_version 74585 (0.0008) -[2024-07-05 15:44:29,689][03976] Updated weights for policy 0, policy_version 74595 (0.0007) -[2024-07-05 15:44:31,168][03423] Fps is (10 sec: 48333.0, 60 sec: 48059.7, 300 sec: 48374.5). Total num frames: 591142912. Throughput: 0: 12001.6. Samples: 35276988. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:44:31,169][03423] Avg episode reward: [(0, '54.672')] -[2024-07-05 15:44:31,349][03976] Updated weights for policy 0, policy_version 74605 (0.0008) -[2024-07-05 15:44:33,020][03976] Updated weights for policy 0, policy_version 74615 (0.0007) -[2024-07-05 15:44:34,714][03976] Updated weights for policy 0, policy_version 74625 (0.0010) -[2024-07-05 15:44:36,168][03423] Fps is (10 sec: 49151.7, 60 sec: 48196.2, 300 sec: 48402.2). Total num frames: 591388672. Throughput: 0: 12017.4. Samples: 35313532. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) -[2024-07-05 15:44:36,169][03423] Avg episode reward: [(0, '53.483')] -[2024-07-05 15:44:36,402][03976] Updated weights for policy 0, policy_version 74635 (0.0008) -[2024-07-05 15:44:38,192][03976] Updated weights for policy 0, policy_version 74645 (0.0010) -[2024-07-05 15:44:39,940][03976] Updated weights for policy 0, policy_version 74655 (0.0008) -[2024-07-05 15:44:41,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48059.7, 300 sec: 48402.2). Total num frames: 591626240. Throughput: 0: 12063.8. Samples: 35385732. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:44:41,169][03423] Avg episode reward: [(0, '55.064')] -[2024-07-05 15:44:41,595][03976] Updated weights for policy 0, policy_version 74665 (0.0007) -[2024-07-05 15:44:43,243][03976] Updated weights for policy 0, policy_version 74675 (0.0010) -[2024-07-05 15:44:44,922][03976] Updated weights for policy 0, policy_version 74685 (0.0008) -[2024-07-05 15:44:46,167][03423] Fps is (10 sec: 48333.3, 60 sec: 48196.3, 300 sec: 48402.3). Total num frames: 591872000. Throughput: 0: 12101.6. Samples: 35458844. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:44:46,168][03423] Avg episode reward: [(0, '54.356')] -[2024-07-05 15:44:46,549][03976] Updated weights for policy 0, policy_version 74695 (0.0007) -[2024-07-05 15:44:48,225][03976] Updated weights for policy 0, policy_version 74705 (0.0007) -[2024-07-05 15:44:49,870][03976] Updated weights for policy 0, policy_version 74715 (0.0007) -[2024-07-05 15:44:51,168][03423] Fps is (10 sec: 49151.3, 60 sec: 48332.6, 300 sec: 48402.2). Total num frames: 592117760. Throughput: 0: 12131.3. Samples: 35495972. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:44:51,169][03423] Avg episode reward: [(0, '55.473')] -[2024-07-05 15:44:51,569][03976] Updated weights for policy 0, policy_version 74725 (0.0010) -[2024-07-05 15:44:53,315][03976] Updated weights for policy 0, policy_version 74735 (0.0008) -[2024-07-05 15:44:55,002][03976] Updated weights for policy 0, policy_version 74745 (0.0007) -[2024-07-05 15:44:56,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48332.9, 300 sec: 48402.2). Total num frames: 592355328. Throughput: 0: 12128.2. Samples: 35568160. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:44:56,168][03423] Avg episode reward: [(0, '54.956')] -[2024-07-05 15:44:56,717][03976] Updated weights for policy 0, policy_version 74755 (0.0008) -[2024-07-05 15:44:58,432][03976] Updated weights for policy 0, policy_version 74765 (0.0008) -[2024-07-05 15:45:00,145][03976] Updated weights for policy 0, policy_version 74775 (0.0007) -[2024-07-05 15:45:01,168][03423] Fps is (10 sec: 47514.4, 60 sec: 48332.7, 300 sec: 48374.5). Total num frames: 592592896. Throughput: 0: 12112.9. Samples: 35639856. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:01,169][03423] Avg episode reward: [(0, '51.890')] -[2024-07-05 15:45:01,901][03976] Updated weights for policy 0, policy_version 74785 (0.0009) -[2024-07-05 15:45:03,571][03976] Updated weights for policy 0, policy_version 74795 (0.0007) -[2024-07-05 15:45:05,262][03976] Updated weights for policy 0, policy_version 74805 (0.0008) -[2024-07-05 15:45:06,167][03423] Fps is (10 sec: 48332.9, 60 sec: 48332.9, 300 sec: 48374.5). Total num frames: 592838656. Throughput: 0: 12112.3. Samples: 35676164. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:06,168][03423] Avg episode reward: [(0, '52.693')] -[2024-07-05 15:45:06,960][03976] Updated weights for policy 0, policy_version 74815 (0.0012) -[2024-07-05 15:45:08,678][03976] Updated weights for policy 0, policy_version 74825 (0.0007) -[2024-07-05 15:45:10,427][03976] Updated weights for policy 0, policy_version 74835 (0.0008) -[2024-07-05 15:45:11,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48346.7). Total num frames: 593076224. Throughput: 0: 12083.6. Samples: 35747924. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:11,169][03423] Avg episode reward: [(0, '54.710')] -[2024-07-05 15:45:12,142][03976] Updated weights for policy 0, policy_version 74845 (0.0009) -[2024-07-05 15:45:13,811][03976] Updated weights for policy 0, policy_version 74855 (0.0008) -[2024-07-05 15:45:15,502][03976] Updated weights for policy 0, policy_version 74865 (0.0007) -[2024-07-05 15:45:16,168][03423] Fps is (10 sec: 47512.9, 60 sec: 48332.7, 300 sec: 48318.9). Total num frames: 593313792. Throughput: 0: 12066.9. Samples: 35820000. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:16,169][03423] Avg episode reward: [(0, '55.795')] -[2024-07-05 15:45:17,220][03976] Updated weights for policy 0, policy_version 74875 (0.0008) -[2024-07-05 15:45:18,938][03976] Updated weights for policy 0, policy_version 74885 (0.0008) -[2024-07-05 15:45:20,675][03976] Updated weights for policy 0, policy_version 74895 (0.0009) -[2024-07-05 15:45:21,168][03423] Fps is (10 sec: 48332.0, 60 sec: 48332.7, 300 sec: 48346.7). Total num frames: 593559552. Throughput: 0: 12059.5. Samples: 35856212. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:21,169][03423] Avg episode reward: [(0, '55.950')] -[2024-07-05 15:45:22,351][03976] Updated weights for policy 0, policy_version 74905 (0.0007) -[2024-07-05 15:45:23,998][03976] Updated weights for policy 0, policy_version 74915 (0.0008) -[2024-07-05 15:45:25,685][03976] Updated weights for policy 0, policy_version 74925 (0.0008) -[2024-07-05 15:45:26,167][03423] Fps is (10 sec: 48333.3, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 593797120. Throughput: 0: 12054.6. Samples: 35928188. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:26,169][03423] Avg episode reward: [(0, '54.563')] -[2024-07-05 15:45:27,369][03976] Updated weights for policy 0, policy_version 74935 (0.0010) -[2024-07-05 15:45:29,056][03976] Updated weights for policy 0, policy_version 74945 (0.0009) -[2024-07-05 15:45:30,705][03976] Updated weights for policy 0, policy_version 74955 (0.0009) -[2024-07-05 15:45:31,168][03423] Fps is (10 sec: 48333.5, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 594042880. Throughput: 0: 12061.9. Samples: 36001632. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:31,169][03423] Avg episode reward: [(0, '56.344')] -[2024-07-05 15:45:32,411][03976] Updated weights for policy 0, policy_version 74965 (0.0008) -[2024-07-05 15:45:34,124][03976] Updated weights for policy 0, policy_version 74975 (0.0009) -[2024-07-05 15:45:35,790][03976] Updated weights for policy 0, policy_version 74985 (0.0008) -[2024-07-05 15:45:36,168][03423] Fps is (10 sec: 49151.9, 60 sec: 48332.9, 300 sec: 48346.7). Total num frames: 594288640. Throughput: 0: 12040.4. Samples: 36037788. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:36,169][03423] Avg episode reward: [(0, '54.701')] -[2024-07-05 15:45:37,486][03976] Updated weights for policy 0, policy_version 74995 (0.0008) -[2024-07-05 15:45:39,186][03976] Updated weights for policy 0, policy_version 75005 (0.0009) -[2024-07-05 15:45:40,846][03976] Updated weights for policy 0, policy_version 75015 (0.0009) -[2024-07-05 15:45:41,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48332.8, 300 sec: 48318.9). Total num frames: 594526208. Throughput: 0: 12069.8. Samples: 36111300. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:41,168][03423] Avg episode reward: [(0, '52.992')] -[2024-07-05 15:45:42,480][03976] Updated weights for policy 0, policy_version 75025 (0.0008) -[2024-07-05 15:45:44,194][03976] Updated weights for policy 0, policy_version 75035 (0.0007) -[2024-07-05 15:45:45,867][03976] Updated weights for policy 0, policy_version 75045 (0.0007) -[2024-07-05 15:45:46,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48332.8, 300 sec: 48346.7). Total num frames: 594771968. Throughput: 0: 12092.7. Samples: 36184028. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:46,168][03423] Avg episode reward: [(0, '55.630')] -[2024-07-05 15:45:47,608][03976] Updated weights for policy 0, policy_version 75055 (0.0008) -[2024-07-05 15:45:49,276][03976] Updated weights for policy 0, policy_version 75065 (0.0008) -[2024-07-05 15:45:50,944][03976] Updated weights for policy 0, policy_version 75075 (0.0008) -[2024-07-05 15:45:51,167][03423] Fps is (10 sec: 49152.2, 60 sec: 48333.0, 300 sec: 48346.7). Total num frames: 595017728. Throughput: 0: 12099.1. Samples: 36220624. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:51,169][03423] Avg episode reward: [(0, '52.711')] -[2024-07-05 15:45:52,610][03976] Updated weights for policy 0, policy_version 75085 (0.0007) -[2024-07-05 15:45:54,318][03976] Updated weights for policy 0, policy_version 75095 (0.0007) -[2024-07-05 15:45:56,042][03976] Updated weights for policy 0, policy_version 75105 (0.0008) -[2024-07-05 15:45:56,168][03423] Fps is (10 sec: 49151.1, 60 sec: 48469.2, 300 sec: 48346.7). Total num frames: 595263488. Throughput: 0: 12112.9. Samples: 36293008. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:45:56,168][03423] Avg episode reward: [(0, '55.105')] -[2024-07-05 15:45:57,683][03976] Updated weights for policy 0, policy_version 75115 (0.0009) -[2024-07-05 15:45:59,367][03976] Updated weights for policy 0, policy_version 75125 (0.0008) -[2024-07-05 15:46:01,030][03976] Updated weights for policy 0, policy_version 75135 (0.0011) -[2024-07-05 15:46:01,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48469.3, 300 sec: 48318.9). Total num frames: 595501056. Throughput: 0: 12155.7. Samples: 36367004. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:01,168][03423] Avg episode reward: [(0, '54.393')] -[2024-07-05 15:46:02,705][03976] Updated weights for policy 0, policy_version 75145 (0.0008) -[2024-07-05 15:46:04,364][03976] Updated weights for policy 0, policy_version 75155 (0.0007) -[2024-07-05 15:46:06,004][03976] Updated weights for policy 0, policy_version 75165 (0.0010) -[2024-07-05 15:46:06,167][03423] Fps is (10 sec: 48333.5, 60 sec: 48469.3, 300 sec: 48346.7). Total num frames: 595746816. Throughput: 0: 12168.2. Samples: 36403780. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:06,168][03423] Avg episode reward: [(0, '53.915')] -[2024-07-05 15:46:06,193][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000075166_595755008.pth... -[2024-07-05 15:46:06,267][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000073749_584146944.pth -[2024-07-05 15:46:07,730][03976] Updated weights for policy 0, policy_version 75175 (0.0010) -[2024-07-05 15:46:09,368][03976] Updated weights for policy 0, policy_version 75185 (0.0010) -[2024-07-05 15:46:11,026][03976] Updated weights for policy 0, policy_version 75195 (0.0010) -[2024-07-05 15:46:11,168][03423] Fps is (10 sec: 49152.0, 60 sec: 48605.8, 300 sec: 48374.5). Total num frames: 595992576. Throughput: 0: 12194.8. Samples: 36476956. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:11,168][03423] Avg episode reward: [(0, '54.709')] -[2024-07-05 15:46:12,719][03976] Updated weights for policy 0, policy_version 75205 (0.0008) -[2024-07-05 15:46:14,380][03976] Updated weights for policy 0, policy_version 75215 (0.0008) -[2024-07-05 15:46:16,066][03976] Updated weights for policy 0, policy_version 75225 (0.0008) -[2024-07-05 15:46:16,167][03423] Fps is (10 sec: 49152.1, 60 sec: 48742.5, 300 sec: 48374.5). Total num frames: 596238336. Throughput: 0: 12197.4. Samples: 36550516. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:16,168][03423] Avg episode reward: [(0, '55.415')] -[2024-07-05 15:46:17,769][03976] Updated weights for policy 0, policy_version 75235 (0.0010) -[2024-07-05 15:46:19,428][03976] Updated weights for policy 0, policy_version 75245 (0.0007) -[2024-07-05 15:46:21,064][03976] Updated weights for policy 0, policy_version 75255 (0.0007) -[2024-07-05 15:46:21,167][03423] Fps is (10 sec: 49152.4, 60 sec: 48742.6, 300 sec: 48374.6). Total num frames: 596484096. Throughput: 0: 12207.3. Samples: 36587116. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:21,169][03423] Avg episode reward: [(0, '55.659')] -[2024-07-05 15:46:22,753][03976] Updated weights for policy 0, policy_version 75265 (0.0008) -[2024-07-05 15:46:24,462][03976] Updated weights for policy 0, policy_version 75275 (0.0007) -[2024-07-05 15:46:26,120][03976] Updated weights for policy 0, policy_version 75285 (0.0008) -[2024-07-05 15:46:26,167][03423] Fps is (10 sec: 49152.1, 60 sec: 48879.0, 300 sec: 48402.2). Total num frames: 596729856. Throughput: 0: 12201.9. Samples: 36660384. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:26,168][03423] Avg episode reward: [(0, '55.013')] -[2024-07-05 15:46:27,790][03976] Updated weights for policy 0, policy_version 75295 (0.0008) -[2024-07-05 15:46:29,445][03976] Updated weights for policy 0, policy_version 75305 (0.0007) -[2024-07-05 15:46:31,121][03976] Updated weights for policy 0, policy_version 75315 (0.0010) -[2024-07-05 15:46:31,168][03423] Fps is (10 sec: 49151.7, 60 sec: 48878.9, 300 sec: 48402.2). Total num frames: 596975616. Throughput: 0: 12224.1. Samples: 36734112. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:31,168][03423] Avg episode reward: [(0, '57.652')] -[2024-07-05 15:46:32,743][03976] Updated weights for policy 0, policy_version 75325 (0.0007) -[2024-07-05 15:46:34,379][03976] Updated weights for policy 0, policy_version 75335 (0.0010) -[2024-07-05 15:46:36,065][03976] Updated weights for policy 0, policy_version 75345 (0.0008) -[2024-07-05 15:46:36,168][03423] Fps is (10 sec: 49151.7, 60 sec: 48878.9, 300 sec: 48430.0). Total num frames: 597221376. Throughput: 0: 12234.8. Samples: 36771192. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:36,169][03423] Avg episode reward: [(0, '55.284')] -[2024-07-05 15:46:37,747][03976] Updated weights for policy 0, policy_version 75355 (0.0007) -[2024-07-05 15:46:39,404][03976] Updated weights for policy 0, policy_version 75365 (0.0007) -[2024-07-05 15:46:41,065][03976] Updated weights for policy 0, policy_version 75375 (0.0008) -[2024-07-05 15:46:41,167][03423] Fps is (10 sec: 49152.1, 60 sec: 49015.5, 300 sec: 48457.8). Total num frames: 597467136. Throughput: 0: 12273.5. Samples: 36845312. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:41,168][03423] Avg episode reward: [(0, '56.245')] -[2024-07-05 15:46:42,723][03976] Updated weights for policy 0, policy_version 75385 (0.0009) -[2024-07-05 15:46:44,445][03976] Updated weights for policy 0, policy_version 75395 (0.0007) -[2024-07-05 15:46:46,114][03976] Updated weights for policy 0, policy_version 75405 (0.0009) -[2024-07-05 15:46:46,168][03423] Fps is (10 sec: 49152.0, 60 sec: 49015.4, 300 sec: 48457.8). Total num frames: 597712896. Throughput: 0: 12262.5. Samples: 36918816. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:46,169][03423] Avg episode reward: [(0, '56.658')] -[2024-07-05 15:46:47,807][03976] Updated weights for policy 0, policy_version 75415 (0.0010) -[2024-07-05 15:46:49,520][03976] Updated weights for policy 0, policy_version 75425 (0.0008) -[2024-07-05 15:46:51,167][03423] Fps is (10 sec: 48333.0, 60 sec: 48879.0, 300 sec: 48457.8). Total num frames: 597950464. Throughput: 0: 12245.9. Samples: 36954844. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) -[2024-07-05 15:46:51,168][03423] Avg episode reward: [(0, '54.088')] -[2024-07-05 15:46:51,193][03976] Updated weights for policy 0, policy_version 75435 (0.0008) -[2024-07-05 15:46:52,896][03976] Updated weights for policy 0, policy_version 75445 (0.0009) -[2024-07-05 15:46:54,558][03976] Updated weights for policy 0, policy_version 75455 (0.0010) -[2024-07-05 15:46:56,168][03423] Fps is (10 sec: 48332.6, 60 sec: 48879.0, 300 sec: 48457.8). Total num frames: 598196224. Throughput: 0: 12242.4. Samples: 37027864. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:46:56,168][03423] Avg episode reward: [(0, '56.858')] -[2024-07-05 15:46:56,261][03976] Updated weights for policy 0, policy_version 75465 (0.0008) -[2024-07-05 15:46:57,934][03976] Updated weights for policy 0, policy_version 75475 (0.0010) -[2024-07-05 15:46:59,604][03976] Updated weights for policy 0, policy_version 75485 (0.0008) -[2024-07-05 15:47:01,168][03423] Fps is (10 sec: 49151.7, 60 sec: 49015.5, 300 sec: 48485.5). Total num frames: 598441984. Throughput: 0: 12229.9. Samples: 37100860. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:47:01,169][03423] Avg episode reward: [(0, '54.779')] -[2024-07-05 15:47:01,344][03976] Updated weights for policy 0, policy_version 75495 (0.0008) -[2024-07-05 15:47:03,061][03976] Updated weights for policy 0, policy_version 75505 (0.0012) -[2024-07-05 15:47:04,790][03976] Updated weights for policy 0, policy_version 75515 (0.0011) -[2024-07-05 15:47:06,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48878.8, 300 sec: 48457.7). Total num frames: 598679552. Throughput: 0: 12209.4. Samples: 37136540. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:47:06,169][03423] Avg episode reward: [(0, '54.798')] -[2024-07-05 15:47:06,491][03976] Updated weights for policy 0, policy_version 75525 (0.0010) -[2024-07-05 15:47:08,162][03976] Updated weights for policy 0, policy_version 75535 (0.0007) -[2024-07-05 15:47:09,841][03976] Updated weights for policy 0, policy_version 75545 (0.0007) -[2024-07-05 15:47:11,168][03423] Fps is (10 sec: 47513.6, 60 sec: 48742.4, 300 sec: 48430.0). Total num frames: 598917120. Throughput: 0: 12176.8. Samples: 37208340. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:47:11,169][03423] Avg episode reward: [(0, '53.910')] -[2024-07-05 15:47:11,513][03976] Updated weights for policy 0, policy_version 75555 (0.0008) -[2024-07-05 15:47:13,245][03976] Updated weights for policy 0, policy_version 75565 (0.0009) -[2024-07-05 15:47:14,961][03976] Updated weights for policy 0, policy_version 75575 (0.0008) -[2024-07-05 15:47:16,168][03423] Fps is (10 sec: 48332.4, 60 sec: 48742.2, 300 sec: 48457.7). Total num frames: 599162880. Throughput: 0: 12151.4. Samples: 37280928. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:47:16,169][03423] Avg episode reward: [(0, '56.013')] -[2024-07-05 15:47:16,644][03976] Updated weights for policy 0, policy_version 75585 (0.0008) -[2024-07-05 15:47:18,305][03976] Updated weights for policy 0, policy_version 75595 (0.0009) -[2024-07-05 15:47:19,983][03976] Updated weights for policy 0, policy_version 75605 (0.0009) -[2024-07-05 15:47:21,168][03423] Fps is (10 sec: 48332.7, 60 sec: 48605.8, 300 sec: 48430.0). Total num frames: 599400448. Throughput: 0: 12142.3. Samples: 37317596. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:47:21,168][03423] Avg episode reward: [(0, '57.751')] -[2024-07-05 15:47:21,660][03976] Updated weights for policy 0, policy_version 75615 (0.0008) -[2024-07-05 15:47:23,323][03976] Updated weights for policy 0, policy_version 75625 (0.0007) -[2024-07-05 15:47:24,984][03976] Updated weights for policy 0, policy_version 75635 (0.0007) -[2024-07-05 15:47:26,167][03423] Fps is (10 sec: 49153.0, 60 sec: 48742.4, 300 sec: 48485.6). Total num frames: 599654400. Throughput: 0: 12122.4. Samples: 37390820. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:47:26,169][03423] Avg episode reward: [(0, '54.900')] -[2024-07-05 15:47:26,632][03976] Updated weights for policy 0, policy_version 75645 (0.0008) -[2024-07-05 15:47:28,283][03976] Updated weights for policy 0, policy_version 75655 (0.0007) -[2024-07-05 15:47:29,979][03976] Updated weights for policy 0, policy_version 75665 (0.0007) -[2024-07-05 15:47:31,168][03423] Fps is (10 sec: 49971.3, 60 sec: 48742.4, 300 sec: 48485.6). Total num frames: 599900160. Throughput: 0: 12123.6. Samples: 37464376. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) -[2024-07-05 15:47:31,169][03423] Avg episode reward: [(0, '56.118')] -[2024-07-05 15:47:31,677][03976] Updated weights for policy 0, policy_version 75675 (0.0007) -[2024-07-05 15:47:33,340][03976] Updated weights for policy 0, policy_version 75685 (0.0007) -[2024-07-05 15:47:33,504][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000075686_600014848.pth... -[2024-07-05 15:47:33,510][03956] Stopping Batcher_0... -[2024-07-05 15:47:33,510][03956] Loop batcher_evt_loop terminating... -[2024-07-05 15:47:33,519][03423] Component Batcher_0 stopped! -[2024-07-05 15:47:33,547][03423] Component RolloutWorker_w3 stopped! -[2024-07-05 15:47:33,550][03423] Component RolloutWorker_w10 stopped! -[2024-07-05 15:47:33,550][03986] Stopping RolloutWorker_w10... -[2024-07-05 15:47:33,552][03986] Loop rollout_proc10_evt_loop terminating... -[2024-07-05 15:47:33,552][04006] Stopping RolloutWorker_w14... -[2024-07-05 15:47:33,552][03981] Stopping RolloutWorker_w2... -[2024-07-05 15:47:33,552][03423] Component RolloutWorker_w1 stopped! -[2024-07-05 15:47:33,553][03981] Loop rollout_proc2_evt_loop terminating... -[2024-07-05 15:47:33,553][04006] Loop rollout_proc14_evt_loop terminating... -[2024-07-05 15:47:33,552][03978] Stopping RolloutWorker_w1... -[2024-07-05 15:47:33,553][03978] Loop rollout_proc1_evt_loop terminating... -[2024-07-05 15:47:33,553][03423] Component RolloutWorker_w14 stopped! -[2024-07-05 15:47:33,553][04007] Stopping RolloutWorker_w15... -[2024-07-05 15:47:33,554][04007] Loop rollout_proc15_evt_loop terminating... -[2024-07-05 15:47:33,554][03423] Component RolloutWorker_w13 stopped! -[2024-07-05 15:47:33,555][03423] Component RolloutWorker_w2 stopped! -[2024-07-05 15:47:33,555][03423] Component RolloutWorker_w11 stopped! -[2024-07-05 15:47:33,556][03423] Component RolloutWorker_w0 stopped! -[2024-07-05 15:47:33,557][03423] Component RolloutWorker_w15 stopped! -[2024-07-05 15:47:33,557][03984] Stopping RolloutWorker_w8... -[2024-07-05 15:47:33,550][03979] Stopping RolloutWorker_w3... -[2024-07-05 15:47:33,557][03423] Component RolloutWorker_w7 stopped! -[2024-07-05 15:47:33,557][03984] Loop rollout_proc8_evt_loop terminating... -[2024-07-05 15:47:33,558][03979] Loop rollout_proc3_evt_loop terminating... -[2024-07-05 15:47:33,557][03423] Component RolloutWorker_w5 stopped! -[2024-07-05 15:47:33,558][03423] Component RolloutWorker_w6 stopped! -[2024-07-05 15:47:33,559][03423] Component RolloutWorker_w8 stopped! -[2024-07-05 15:47:33,559][03423] Component RolloutWorker_w9 stopped! -[2024-07-05 15:47:33,555][03987] Stopping RolloutWorker_w11... -[2024-07-05 15:47:33,561][03987] Loop rollout_proc11_evt_loop terminating... -[2024-07-05 15:47:33,557][03985] Stopping RolloutWorker_w7... -[2024-07-05 15:47:33,562][03985] Loop rollout_proc7_evt_loop terminating... -[2024-07-05 15:47:33,556][03977] Stopping RolloutWorker_w0... -[2024-07-05 15:47:33,563][03977] Loop rollout_proc0_evt_loop terminating... -[2024-07-05 15:47:33,555][04004] Stopping RolloutWorker_w13... -[2024-07-05 15:47:33,560][03988] Stopping RolloutWorker_w9... -[2024-07-05 15:47:33,564][04004] Loop rollout_proc13_evt_loop terminating... -[2024-07-05 15:47:33,565][03988] Loop rollout_proc9_evt_loop terminating... -[2024-07-05 15:47:33,567][03980] Stopping RolloutWorker_w4... -[2024-07-05 15:47:33,568][03980] Loop rollout_proc4_evt_loop terminating... -[2024-07-05 15:47:33,567][03423] Component RolloutWorker_w4 stopped! -[2024-07-05 15:47:33,574][03956] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000074456_589938688.pth -[2024-07-05 15:47:33,558][03983] Stopping RolloutWorker_w5... -[2024-07-05 15:47:33,576][03983] Loop rollout_proc5_evt_loop terminating... -[2024-07-05 15:47:33,576][03956] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000075686_600014848.pth... -[2024-07-05 15:47:33,558][03982] Stopping RolloutWorker_w6... -[2024-07-05 15:47:33,580][03982] Loop rollout_proc6_evt_loop terminating... -[2024-07-05 15:47:33,581][03423] Component RolloutWorker_w12 stopped! -[2024-07-05 15:47:33,631][03976] Weights refcount: 2 0 -[2024-07-05 15:47:33,632][03976] Stopping InferenceWorker_p0-w0... -[2024-07-05 15:47:33,633][03976] Loop inference_proc0-0_evt_loop terminating... -[2024-07-05 15:47:33,633][03423] Component InferenceWorker_p0-w0 stopped! -[2024-07-05 15:47:33,584][04005] Stopping RolloutWorker_w12... -[2024-07-05 15:47:33,644][04005] Loop rollout_proc12_evt_loop terminating... -[2024-07-05 15:47:33,663][03956] Stopping LearnerWorker_p0... -[2024-07-05 15:47:33,664][03956] Loop learner_proc0_evt_loop terminating... -[2024-07-05 15:47:33,664][03423] Component LearnerWorker_p0 stopped! -[2024-07-05 15:47:33,665][03423] Waiting for process learner_proc0 to stop... -[2024-07-05 15:47:34,939][03423] Waiting for process inference_proc0-0 to join... -[2024-07-05 15:47:34,940][03423] Waiting for process rollout_proc0 to join... -[2024-07-05 15:47:34,940][03423] Waiting for process rollout_proc1 to join... -[2024-07-05 15:47:34,941][03423] Waiting for process rollout_proc2 to join... -[2024-07-05 15:47:34,941][03423] Waiting for process rollout_proc3 to join... -[2024-07-05 15:47:34,941][03423] Waiting for process rollout_proc4 to join... -[2024-07-05 15:47:34,941][03423] Waiting for process rollout_proc5 to join... -[2024-07-05 15:47:34,942][03423] Waiting for process rollout_proc6 to join... -[2024-07-05 15:47:34,942][03423] Waiting for process rollout_proc7 to join... -[2024-07-05 15:47:35,050][03423] Waiting for process rollout_proc8 to join... -[2024-07-05 15:47:35,051][03423] Waiting for process rollout_proc9 to join... -[2024-07-05 15:47:35,052][03423] Waiting for process rollout_proc10 to join... -[2024-07-05 15:47:35,052][03423] Waiting for process rollout_proc11 to join... -[2024-07-05 15:47:35,052][03423] Waiting for process rollout_proc12 to join... -[2024-07-05 15:47:35,053][03423] Waiting for process rollout_proc13 to join... -[2024-07-05 15:47:35,053][03423] Waiting for process rollout_proc14 to join... -[2024-07-05 15:47:35,053][03423] Waiting for process rollout_proc15 to join... -[2024-07-05 15:47:35,054][03423] Batcher 0 profile tree view: -batching: 226.9723, releasing_batches: 0.4062 -[2024-07-05 15:47:35,054][03423] InferenceWorker_p0-w0 profile tree view: +[2024-07-05 15:58:42,791][04600] Worker 5 uses CPU cores [10, 11] +[2024-07-05 15:58:42,808][04597] Worker 1 uses CPU cores [2, 3] +[2024-07-05 15:58:42,849][04594] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 15:58:42,849][04594] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-07-05 15:58:42,876][04581] Using optimizer +[2024-07-05 15:58:42,891][04594] Num visible devices: 1 +[2024-07-05 15:58:43,368][04581] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004884_20004864.pth... +[2024-07-05 15:58:43,439][04581] Loading model from checkpoint +[2024-07-05 15:58:43,440][04581] Loaded experiment state at self.train_step=4884, self.env_steps=20004864 +[2024-07-05 15:58:43,441][04581] Initialized policy 0 weights for model version 4884 +[2024-07-05 15:58:43,442][04581] LearnerWorker_p0 finished initialization! +[2024-07-05 15:58:43,442][04581] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-07-05 15:58:43,501][04594] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 15:58:43,503][04594] RunningMeanStd input shape: (1,) +[2024-07-05 15:58:43,510][04594] Num input channels: 3 +[2024-07-05 15:58:43,520][04594] Convolutional layer output size: 4608 +[2024-07-05 15:58:43,531][04594] Policy head output size: 512 +[2024-07-05 15:58:43,652][04005] Inference worker 0-0 is ready! +[2024-07-05 15:58:43,653][04005] All inference workers are ready! Signal rollout workers to start! +[2024-07-05 15:58:43,680][04602] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 15:58:43,680][04598] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 15:58:43,681][04600] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 15:58:43,681][04601] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 15:58:43,681][04599] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 15:58:43,681][04595] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 15:58:43,682][04597] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 15:58:43,682][04596] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 15:58:43,941][04005] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 20004864. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-07-05 15:58:44,254][04602] Decorrelating experience for 0 frames... +[2024-07-05 15:58:44,254][04601] Decorrelating experience for 0 frames... +[2024-07-05 15:58:44,254][04597] Decorrelating experience for 0 frames... +[2024-07-05 15:58:44,254][04600] Decorrelating experience for 0 frames... +[2024-07-05 15:58:44,254][04598] Decorrelating experience for 0 frames... +[2024-07-05 15:58:44,254][04595] Decorrelating experience for 0 frames... +[2024-07-05 15:58:44,402][04601] Decorrelating experience for 32 frames... +[2024-07-05 15:58:44,402][04595] Decorrelating experience for 32 frames... +[2024-07-05 15:58:44,402][04597] Decorrelating experience for 32 frames... +[2024-07-05 15:58:44,405][04602] Decorrelating experience for 32 frames... +[2024-07-05 15:58:44,414][04596] Decorrelating experience for 0 frames... +[2024-07-05 15:58:44,414][04599] Decorrelating experience for 0 frames... +[2024-07-05 15:58:44,562][04600] Decorrelating experience for 32 frames... +[2024-07-05 15:58:44,562][04598] Decorrelating experience for 32 frames... +[2024-07-05 15:58:44,597][04597] Decorrelating experience for 64 frames... +[2024-07-05 15:58:44,597][04595] Decorrelating experience for 64 frames... +[2024-07-05 15:58:44,597][04601] Decorrelating experience for 64 frames... +[2024-07-05 15:58:44,719][04596] Decorrelating experience for 32 frames... +[2024-07-05 15:58:44,752][04600] Decorrelating experience for 64 frames... +[2024-07-05 15:58:44,760][04595] Decorrelating experience for 96 frames... +[2024-07-05 15:58:44,760][04597] Decorrelating experience for 96 frames... +[2024-07-05 15:58:44,901][04602] Decorrelating experience for 64 frames... +[2024-07-05 15:58:44,901][04598] Decorrelating experience for 64 frames... +[2024-07-05 15:58:44,906][04596] Decorrelating experience for 64 frames... +[2024-07-05 15:58:44,922][04600] Decorrelating experience for 96 frames... +[2024-07-05 15:58:45,066][04602] Decorrelating experience for 96 frames... +[2024-07-05 15:58:45,067][04598] Decorrelating experience for 96 frames... +[2024-07-05 15:58:45,067][04601] Decorrelating experience for 96 frames... +[2024-07-05 15:58:45,073][04596] Decorrelating experience for 96 frames... +[2024-07-05 15:58:45,228][04599] Decorrelating experience for 32 frames... +[2024-07-05 15:58:45,421][04599] Decorrelating experience for 64 frames... +[2024-07-05 15:58:45,596][04599] Decorrelating experience for 96 frames... +[2024-07-05 15:58:46,023][04581] Signal inference workers to stop experience collection... +[2024-07-05 15:58:46,029][04594] InferenceWorker_p0-w0: stopping experience collection +[2024-07-05 15:58:48,865][04581] Signal inference workers to resume experience collection... +[2024-07-05 15:58:48,866][04594] InferenceWorker_p0-w0: resuming experience collection +[2024-07-05 15:58:48,942][04005] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 20008960. Throughput: 0: 580.4. Samples: 2902. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-07-05 15:58:48,943][04005] Avg episode reward: [(0, '2.016')] +[2024-07-05 15:58:52,050][04594] Updated weights for policy 0, policy_version 4894 (0.0102) +[2024-07-05 15:58:53,941][04005] Fps is (10 sec: 6144.0, 60 sec: 6144.0, 300 sec: 6144.0). Total num frames: 20066304. Throughput: 0: 1339.0. Samples: 13390. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-07-05 15:58:53,942][04005] Avg episode reward: [(0, '28.170')] +[2024-07-05 15:58:55,529][04594] Updated weights for policy 0, policy_version 4904 (0.0012) +[2024-07-05 15:58:58,941][04005] Fps is (10 sec: 11468.9, 60 sec: 7918.9, 300 sec: 7918.9). Total num frames: 20123648. Throughput: 0: 2075.2. Samples: 31128. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 15:58:58,942][04005] Avg episode reward: [(0, '31.070')] +[2024-07-05 15:58:58,981][04594] Updated weights for policy 0, policy_version 4914 (0.0012) +[2024-07-05 15:58:59,123][04005] Heartbeat connected on Batcher_0 +[2024-07-05 15:58:59,134][04005] Heartbeat connected on RolloutWorker_w0 +[2024-07-05 15:58:59,139][04005] Heartbeat connected on RolloutWorker_w1 +[2024-07-05 15:58:59,140][04005] Heartbeat connected on RolloutWorker_w2 +[2024-07-05 15:58:59,144][04005] Heartbeat connected on RolloutWorker_w3 +[2024-07-05 15:58:59,147][04005] Heartbeat connected on RolloutWorker_w4 +[2024-07-05 15:58:59,147][04005] Heartbeat connected on InferenceWorker_p0-w0 +[2024-07-05 15:58:59,150][04005] Heartbeat connected on RolloutWorker_w5 +[2024-07-05 15:58:59,153][04005] Heartbeat connected on RolloutWorker_w6 +[2024-07-05 15:58:59,155][04005] Heartbeat connected on RolloutWorker_w7 +[2024-07-05 15:58:59,332][04005] Heartbeat connected on LearnerWorker_p0 +[2024-07-05 15:59:02,435][04594] Updated weights for policy 0, policy_version 4924 (0.0012) +[2024-07-05 15:59:03,941][04005] Fps is (10 sec: 11878.5, 60 sec: 9011.2, 300 sec: 9011.2). Total num frames: 20185088. Throughput: 0: 2000.1. Samples: 40002. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 15:59:03,942][04005] Avg episode reward: [(0, '36.562')] +[2024-07-05 15:59:05,898][04594] Updated weights for policy 0, policy_version 4934 (0.0012) +[2024-07-05 15:59:08,942][04005] Fps is (10 sec: 11878.3, 60 sec: 9502.7, 300 sec: 9502.7). Total num frames: 20242432. Throughput: 0: 2303.9. Samples: 57598. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 15:59:08,943][04005] Avg episode reward: [(0, '36.161')] +[2024-07-05 15:59:09,433][04594] Updated weights for policy 0, policy_version 4944 (0.0012) +[2024-07-05 15:59:12,948][04594] Updated weights for policy 0, policy_version 4954 (0.0012) +[2024-07-05 15:59:13,941][04005] Fps is (10 sec: 11468.7, 60 sec: 9830.4, 300 sec: 9830.4). Total num frames: 20299776. Throughput: 0: 2500.6. Samples: 75018. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 15:59:13,942][04005] Avg episode reward: [(0, '36.971')] +[2024-07-05 15:59:16,445][04594] Updated weights for policy 0, policy_version 4964 (0.0012) +[2024-07-05 15:59:18,941][04005] Fps is (10 sec: 11878.6, 60 sec: 10181.5, 300 sec: 10181.5). Total num frames: 20361216. Throughput: 0: 2399.7. Samples: 83990. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 15:59:18,942][04005] Avg episode reward: [(0, '39.046')] +[2024-07-05 15:59:19,904][04594] Updated weights for policy 0, policy_version 4974 (0.0012) +[2024-07-05 15:59:23,368][04594] Updated weights for policy 0, policy_version 4984 (0.0011) +[2024-07-05 15:59:23,942][04005] Fps is (10 sec: 11878.3, 60 sec: 10342.4, 300 sec: 10342.4). Total num frames: 20418560. Throughput: 0: 2539.6. Samples: 101584. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 15:59:23,943][04005] Avg episode reward: [(0, '39.963')] +[2024-07-05 15:59:26,839][04594] Updated weights for policy 0, policy_version 4994 (0.0012) +[2024-07-05 15:59:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 10558.6, 300 sec: 10558.6). Total num frames: 20480000. Throughput: 0: 2657.6. Samples: 119590. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 15:59:28,942][04005] Avg episode reward: [(0, '40.570')] +[2024-07-05 15:59:30,322][04594] Updated weights for policy 0, policy_version 5004 (0.0011) +[2024-07-05 15:59:33,790][04594] Updated weights for policy 0, policy_version 5014 (0.0012) +[2024-07-05 15:59:33,942][04005] Fps is (10 sec: 11878.4, 60 sec: 10649.6, 300 sec: 10649.6). Total num frames: 20537344. Throughput: 0: 2783.6. Samples: 128164. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 15:59:33,943][04005] Avg episode reward: [(0, '38.056')] +[2024-07-05 15:59:37,260][04594] Updated weights for policy 0, policy_version 5024 (0.0011) +[2024-07-05 15:59:38,941][04005] Fps is (10 sec: 11468.8, 60 sec: 10724.1, 300 sec: 10724.1). Total num frames: 20594688. Throughput: 0: 2946.8. Samples: 145998. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 15:59:38,942][04005] Avg episode reward: [(0, '37.824')] +[2024-07-05 15:59:40,744][04594] Updated weights for policy 0, policy_version 5034 (0.0011) +[2024-07-05 15:59:43,942][04005] Fps is (10 sec: 11878.4, 60 sec: 10854.4, 300 sec: 10854.4). Total num frames: 20656128. Throughput: 0: 2946.5. Samples: 163720. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 15:59:43,942][04005] Avg episode reward: [(0, '36.803')] +[2024-07-05 15:59:44,218][04594] Updated weights for policy 0, policy_version 5044 (0.0012) +[2024-07-05 15:59:47,729][04594] Updated weights for policy 0, policy_version 5054 (0.0011) +[2024-07-05 15:59:48,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 10901.6). Total num frames: 20713472. Throughput: 0: 2938.9. Samples: 172254. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 15:59:48,942][04005] Avg episode reward: [(0, '38.379')] +[2024-07-05 15:59:51,202][04594] Updated weights for policy 0, policy_version 5064 (0.0011) +[2024-07-05 15:59:53,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.8, 300 sec: 10942.2). Total num frames: 20770816. Throughput: 0: 2945.4. Samples: 190142. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 15:59:53,943][04005] Avg episode reward: [(0, '36.251')] +[2024-07-05 15:59:54,677][04594] Updated weights for policy 0, policy_version 5074 (0.0012) +[2024-07-05 15:59:58,159][04594] Updated weights for policy 0, policy_version 5084 (0.0011) +[2024-07-05 15:59:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11031.9). Total num frames: 20832256. Throughput: 0: 2951.2. Samples: 207820. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 15:59:58,942][04005] Avg episode reward: [(0, '34.416')] +[2024-07-05 16:00:01,637][04594] Updated weights for policy 0, policy_version 5094 (0.0012) +[2024-07-05 16:00:03,942][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.8, 300 sec: 11059.2). Total num frames: 20889600. Throughput: 0: 2943.8. Samples: 216462. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 16:00:03,943][04005] Avg episode reward: [(0, '35.229')] +[2024-07-05 16:00:05,119][04594] Updated weights for policy 0, policy_version 5104 (0.0012) +[2024-07-05 16:00:08,628][04594] Updated weights for policy 0, policy_version 5114 (0.0012) +[2024-07-05 16:00:08,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11083.3). Total num frames: 20946944. Throughput: 0: 2948.4. Samples: 234262. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 16:00:08,943][04005] Avg episode reward: [(0, '37.528')] +[2024-07-05 16:00:12,118][04594] Updated weights for policy 0, policy_version 5124 (0.0011) +[2024-07-05 16:00:13,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11150.2). Total num frames: 21008384. Throughput: 0: 2938.4. Samples: 251820. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 16:00:13,943][04005] Avg episode reward: [(0, '38.785')] +[2024-07-05 16:00:15,607][04594] Updated weights for policy 0, policy_version 5134 (0.0012) +[2024-07-05 16:00:18,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11167.0). Total num frames: 21065728. Throughput: 0: 2938.2. Samples: 260384. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 16:00:18,942][04005] Avg episode reward: [(0, '36.914')] +[2024-07-05 16:00:19,099][04594] Updated weights for policy 0, policy_version 5144 (0.0012) +[2024-07-05 16:00:22,588][04594] Updated weights for policy 0, policy_version 5154 (0.0011) +[2024-07-05 16:00:23,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11741.9, 300 sec: 11182.1). Total num frames: 21123072. Throughput: 0: 2937.6. Samples: 278188. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 16:00:23,942][04005] Avg episode reward: [(0, '35.316')] +[2024-07-05 16:00:26,076][04594] Updated weights for policy 0, policy_version 5164 (0.0012) +[2024-07-05 16:00:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11234.8). Total num frames: 21184512. Throughput: 0: 2934.7. Samples: 295780. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 16:00:28,942][04005] Avg episode reward: [(0, '33.375')] +[2024-07-05 16:00:29,573][04594] Updated weights for policy 0, policy_version 5174 (0.0011) +[2024-07-05 16:00:33,064][04594] Updated weights for policy 0, policy_version 5184 (0.0011) +[2024-07-05 16:00:33,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11245.4). Total num frames: 21241856. Throughput: 0: 2934.5. Samples: 304306. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 16:00:33,942][04005] Avg episode reward: [(0, '36.148')] +[2024-07-05 16:00:34,116][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000005187_21245952.pth... +[2024-07-05 16:00:34,191][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004807_19689472.pth +[2024-07-05 16:00:36,597][04594] Updated weights for policy 0, policy_version 5194 (0.0011) +[2024-07-05 16:00:38,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11255.1). Total num frames: 21299200. Throughput: 0: 2925.5. Samples: 321790. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2024-07-05 16:00:38,942][04005] Avg episode reward: [(0, '35.679')] +[2024-07-05 16:00:40,107][04594] Updated weights for policy 0, policy_version 5204 (0.0012) +[2024-07-05 16:00:43,598][04594] Updated weights for policy 0, policy_version 5214 (0.0011) +[2024-07-05 16:00:43,942][04005] Fps is (10 sec: 11468.5, 60 sec: 11673.6, 300 sec: 11264.0). Total num frames: 21356544. Throughput: 0: 2923.9. Samples: 339398. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2024-07-05 16:00:43,943][04005] Avg episode reward: [(0, '38.941')] +[2024-07-05 16:00:47,097][04594] Updated weights for policy 0, policy_version 5224 (0.0012) +[2024-07-05 16:00:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11305.0). Total num frames: 21417984. Throughput: 0: 2926.1. Samples: 348138. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:00:48,942][04005] Avg episode reward: [(0, '38.210')] +[2024-07-05 16:00:50,591][04594] Updated weights for policy 0, policy_version 5234 (0.0012) +[2024-07-05 16:00:53,942][04005] Fps is (10 sec: 11878.7, 60 sec: 11741.9, 300 sec: 11311.3). Total num frames: 21475328. Throughput: 0: 2918.9. Samples: 365614. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:00:53,942][04005] Avg episode reward: [(0, '37.468')] +[2024-07-05 16:00:54,085][04594] Updated weights for policy 0, policy_version 5244 (0.0012) +[2024-07-05 16:00:57,592][04594] Updated weights for policy 0, policy_version 5254 (0.0011) +[2024-07-05 16:00:58,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11317.1). Total num frames: 21532672. Throughput: 0: 2920.1. Samples: 383222. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:00:58,942][04005] Avg episode reward: [(0, '37.018')] +[2024-07-05 16:01:01,076][04594] Updated weights for policy 0, policy_version 5264 (0.0011) +[2024-07-05 16:01:03,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11351.8). Total num frames: 21594112. Throughput: 0: 2927.9. Samples: 392138. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:03,942][04005] Avg episode reward: [(0, '39.217')] +[2024-07-05 16:01:04,574][04594] Updated weights for policy 0, policy_version 5274 (0.0011) +[2024-07-05 16:01:08,064][04594] Updated weights for policy 0, policy_version 5284 (0.0012) +[2024-07-05 16:01:08,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11355.8). Total num frames: 21651456. Throughput: 0: 2920.6. Samples: 409616. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:08,943][04005] Avg episode reward: [(0, '38.099')] +[2024-07-05 16:01:11,564][04594] Updated weights for policy 0, policy_version 5294 (0.0012) +[2024-07-05 16:01:13,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11359.6). Total num frames: 21708800. Throughput: 0: 2919.3. Samples: 427148. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:13,943][04005] Avg episode reward: [(0, '38.902')] +[2024-07-05 16:01:15,051][04594] Updated weights for policy 0, policy_version 5304 (0.0011) +[2024-07-05 16:01:18,536][04594] Updated weights for policy 0, policy_version 5314 (0.0011) +[2024-07-05 16:01:18,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11389.5). Total num frames: 21770240. Throughput: 0: 2929.9. Samples: 436150. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:18,942][04005] Avg episode reward: [(0, '37.198')] +[2024-07-05 16:01:22,040][04594] Updated weights for policy 0, policy_version 5324 (0.0012) +[2024-07-05 16:01:23,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11392.0). Total num frames: 21827584. Throughput: 0: 2929.9. Samples: 453638. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:23,943][04005] Avg episode reward: [(0, '35.189')] +[2024-07-05 16:01:25,536][04594] Updated weights for policy 0, policy_version 5334 (0.0012) +[2024-07-05 16:01:28,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11394.3). Total num frames: 21884928. Throughput: 0: 2927.5. Samples: 471134. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:28,942][04005] Avg episode reward: [(0, '36.917')] +[2024-07-05 16:01:29,033][04594] Updated weights for policy 0, policy_version 5344 (0.0012) +[2024-07-05 16:01:32,538][04594] Updated weights for policy 0, policy_version 5354 (0.0011) +[2024-07-05 16:01:33,941][04005] Fps is (10 sec: 11878.7, 60 sec: 11741.9, 300 sec: 11420.6). Total num frames: 21946368. Throughput: 0: 2932.4. Samples: 480096. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:33,942][04005] Avg episode reward: [(0, '36.823')] +[2024-07-05 16:01:36,057][04594] Updated weights for policy 0, policy_version 5364 (0.0011) +[2024-07-05 16:01:38,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11422.0). Total num frames: 22003712. Throughput: 0: 2931.7. Samples: 497540. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:38,942][04005] Avg episode reward: [(0, '35.043')] +[2024-07-05 16:01:39,567][04594] Updated weights for policy 0, policy_version 5374 (0.0012) +[2024-07-05 16:01:43,080][04594] Updated weights for policy 0, policy_version 5384 (0.0012) +[2024-07-05 16:01:43,942][04005] Fps is (10 sec: 11468.5, 60 sec: 11741.9, 300 sec: 11423.3). Total num frames: 22061056. Throughput: 0: 2928.9. Samples: 515022. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:43,943][04005] Avg episode reward: [(0, '36.434')] +[2024-07-05 16:01:46,605][04594] Updated weights for policy 0, policy_version 5394 (0.0012) +[2024-07-05 16:01:48,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11424.5). Total num frames: 22118400. Throughput: 0: 2923.9. Samples: 523712. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:48,942][04005] Avg episode reward: [(0, '34.980')] +[2024-07-05 16:01:50,164][04594] Updated weights for policy 0, policy_version 5404 (0.0012) +[2024-07-05 16:01:53,709][04594] Updated weights for policy 0, policy_version 5414 (0.0011) +[2024-07-05 16:01:53,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11673.6, 300 sec: 11425.7). Total num frames: 22175744. Throughput: 0: 2915.2. Samples: 540800. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:53,942][04005] Avg episode reward: [(0, '35.104')] +[2024-07-05 16:01:57,212][04594] Updated weights for policy 0, policy_version 5424 (0.0012) +[2024-07-05 16:01:58,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11426.8). Total num frames: 22233088. Throughput: 0: 2918.5. Samples: 558480. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:01:58,943][04005] Avg episode reward: [(0, '34.921')] +[2024-07-05 16:02:00,711][04594] Updated weights for policy 0, policy_version 5434 (0.0012) +[2024-07-05 16:02:03,942][04005] Fps is (10 sec: 11878.0, 60 sec: 11673.5, 300 sec: 11448.3). Total num frames: 22294528. Throughput: 0: 2913.1. Samples: 567240. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:03,943][04005] Avg episode reward: [(0, '37.775')] +[2024-07-05 16:02:04,208][04594] Updated weights for policy 0, policy_version 5444 (0.0011) +[2024-07-05 16:02:07,729][04594] Updated weights for policy 0, policy_version 5454 (0.0012) +[2024-07-05 16:02:08,942][04005] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11448.8). Total num frames: 22351872. Throughput: 0: 2912.4. Samples: 584698. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:08,943][04005] Avg episode reward: [(0, '38.988')] +[2024-07-05 16:02:11,239][04594] Updated weights for policy 0, policy_version 5464 (0.0012) +[2024-07-05 16:02:13,941][04005] Fps is (10 sec: 11469.1, 60 sec: 11673.6, 300 sec: 11449.3). Total num frames: 22409216. Throughput: 0: 2911.2. Samples: 602140. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:13,942][04005] Avg episode reward: [(0, '37.844')] +[2024-07-05 16:02:14,746][04594] Updated weights for policy 0, policy_version 5474 (0.0012) +[2024-07-05 16:02:18,250][04594] Updated weights for policy 0, policy_version 5484 (0.0012) +[2024-07-05 16:02:18,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11449.8). Total num frames: 22466560. Throughput: 0: 2910.7. Samples: 611080. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:18,942][04005] Avg episode reward: [(0, '37.513')] +[2024-07-05 16:02:21,757][04594] Updated weights for policy 0, policy_version 5494 (0.0011) +[2024-07-05 16:02:23,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11468.8). Total num frames: 22528000. Throughput: 0: 2912.4. Samples: 628596. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:23,942][04005] Avg episode reward: [(0, '37.673')] +[2024-07-05 16:02:25,261][04594] Updated weights for policy 0, policy_version 5504 (0.0012) +[2024-07-05 16:02:28,747][04594] Updated weights for policy 0, policy_version 5514 (0.0012) +[2024-07-05 16:02:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11468.8). Total num frames: 22585344. Throughput: 0: 2912.9. Samples: 646104. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:28,942][04005] Avg episode reward: [(0, '38.139')] +[2024-07-05 16:02:32,243][04594] Updated weights for policy 0, policy_version 5524 (0.0011) +[2024-07-05 16:02:33,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11468.8). Total num frames: 22642688. Throughput: 0: 2918.9. Samples: 655064. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:33,942][04005] Avg episode reward: [(0, '36.187')] +[2024-07-05 16:02:33,988][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000005529_22646784.pth... +[2024-07-05 16:02:34,060][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000004884_20004864.pth +[2024-07-05 16:02:35,758][04594] Updated weights for policy 0, policy_version 5534 (0.0012) +[2024-07-05 16:02:38,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11486.2). Total num frames: 22704128. Throughput: 0: 2928.1. Samples: 672564. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:38,942][04005] Avg episode reward: [(0, '35.902')] +[2024-07-05 16:02:39,272][04594] Updated weights for policy 0, policy_version 5544 (0.0011) +[2024-07-05 16:02:42,781][04594] Updated weights for policy 0, policy_version 5554 (0.0012) +[2024-07-05 16:02:43,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11485.9). Total num frames: 22761472. Throughput: 0: 2922.9. Samples: 690012. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:43,942][04005] Avg episode reward: [(0, '36.412')] +[2024-07-05 16:02:46,287][04594] Updated weights for policy 0, policy_version 5564 (0.0012) +[2024-07-05 16:02:48,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11485.5). Total num frames: 22818816. Throughput: 0: 2919.7. Samples: 698624. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:48,942][04005] Avg episode reward: [(0, '39.078')] +[2024-07-05 16:02:49,783][04594] Updated weights for policy 0, policy_version 5574 (0.0011) +[2024-07-05 16:02:53,276][04594] Updated weights for policy 0, policy_version 5584 (0.0011) +[2024-07-05 16:02:53,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11485.2). Total num frames: 22876160. Throughput: 0: 2927.6. Samples: 716440. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:53,942][04005] Avg episode reward: [(0, '39.994')] +[2024-07-05 16:02:56,778][04594] Updated weights for policy 0, policy_version 5594 (0.0011) +[2024-07-05 16:02:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11500.9). Total num frames: 22937600. Throughput: 0: 2928.7. Samples: 733932. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:02:58,942][04005] Avg episode reward: [(0, '39.747')] +[2024-07-05 16:03:00,285][04594] Updated weights for policy 0, policy_version 5604 (0.0012) +[2024-07-05 16:03:03,779][04594] Updated weights for policy 0, policy_version 5614 (0.0012) +[2024-07-05 16:03:03,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11500.3). Total num frames: 22994944. Throughput: 0: 2919.5. Samples: 742456. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:03:03,942][04005] Avg episode reward: [(0, '40.073')] +[2024-07-05 16:03:07,291][04594] Updated weights for policy 0, policy_version 5624 (0.0012) +[2024-07-05 16:03:08,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11499.7). Total num frames: 23052288. Throughput: 0: 2920.2. Samples: 760004. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:03:08,943][04005] Avg episode reward: [(0, '39.329')] +[2024-07-05 16:03:10,802][04594] Updated weights for policy 0, policy_version 5634 (0.0011) +[2024-07-05 16:03:13,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11499.1). Total num frames: 23109632. Throughput: 0: 2922.3. Samples: 777608. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:03:13,943][04005] Avg episode reward: [(0, '38.912')] +[2024-07-05 16:03:14,303][04594] Updated weights for policy 0, policy_version 5644 (0.0011) +[2024-07-05 16:03:17,809][04594] Updated weights for policy 0, policy_version 5654 (0.0012) +[2024-07-05 16:03:18,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11513.5). Total num frames: 23171072. Throughput: 0: 2916.2. Samples: 786294. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:03:18,942][04005] Avg episode reward: [(0, '38.858')] +[2024-07-05 16:03:21,322][04594] Updated weights for policy 0, policy_version 5664 (0.0012) +[2024-07-05 16:03:23,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11512.7). Total num frames: 23228416. Throughput: 0: 2915.8. Samples: 803776. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:03:23,942][04005] Avg episode reward: [(0, '39.360')] +[2024-07-05 16:03:24,812][04594] Updated weights for policy 0, policy_version 5674 (0.0012) +[2024-07-05 16:03:28,309][04594] Updated weights for policy 0, policy_version 5684 (0.0012) +[2024-07-05 16:03:28,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11511.9). Total num frames: 23285760. Throughput: 0: 2916.7. Samples: 821262. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:03:28,942][04005] Avg episode reward: [(0, '39.606')] +[2024-07-05 16:03:31,805][04594] Updated weights for policy 0, policy_version 5694 (0.0012) +[2024-07-05 16:03:33,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11525.3). Total num frames: 23347200. Throughput: 0: 2924.8. Samples: 830242. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:03:33,942][04005] Avg episode reward: [(0, '37.605')] +[2024-07-05 16:03:35,326][04594] Updated weights for policy 0, policy_version 5704 (0.0012) +[2024-07-05 16:03:38,833][04594] Updated weights for policy 0, policy_version 5714 (0.0011) +[2024-07-05 16:03:38,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11524.3). Total num frames: 23404544. Throughput: 0: 2916.4. Samples: 847676. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:03:38,942][04005] Avg episode reward: [(0, '37.575')] +[2024-07-05 16:03:42,337][04594] Updated weights for policy 0, policy_version 5724 (0.0012) +[2024-07-05 16:03:43,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 23461888. Throughput: 0: 2916.6. Samples: 865180. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:03:43,942][04005] Avg episode reward: [(0, '38.607')] +[2024-07-05 16:03:45,834][04594] Updated weights for policy 0, policy_version 5734 (0.0011) +[2024-07-05 16:03:48,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 23519232. Throughput: 0: 2926.0. Samples: 874128. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:03:48,943][04005] Avg episode reward: [(0, '37.394')] +[2024-07-05 16:03:49,344][04594] Updated weights for policy 0, policy_version 5744 (0.0011) +[2024-07-05 16:03:52,832][04594] Updated weights for policy 0, policy_version 5754 (0.0012) +[2024-07-05 16:03:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 23580672. Throughput: 0: 2925.6. Samples: 891656. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:03:53,943][04005] Avg episode reward: [(0, '34.938')] +[2024-07-05 16:03:56,344][04594] Updated weights for policy 0, policy_version 5764 (0.0011) +[2024-07-05 16:03:58,941][04005] Fps is (10 sec: 11878.7, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 23638016. Throughput: 0: 2923.2. Samples: 909152. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:03:58,942][04005] Avg episode reward: [(0, '33.847')] +[2024-07-05 16:03:59,837][04594] Updated weights for policy 0, policy_version 5774 (0.0011) +[2024-07-05 16:04:03,327][04594] Updated weights for policy 0, policy_version 5784 (0.0012) +[2024-07-05 16:04:03,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 23695360. Throughput: 0: 2927.8. Samples: 918044. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:04:03,943][04005] Avg episode reward: [(0, '36.956')] +[2024-07-05 16:04:06,826][04594] Updated weights for policy 0, policy_version 5794 (0.0012) +[2024-07-05 16:04:08,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 23756800. Throughput: 0: 2930.4. Samples: 935646. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:04:08,942][04005] Avg episode reward: [(0, '37.428')] +[2024-07-05 16:04:10,345][04594] Updated weights for policy 0, policy_version 5804 (0.0012) +[2024-07-05 16:04:13,850][04594] Updated weights for policy 0, policy_version 5814 (0.0012) +[2024-07-05 16:04:13,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11704.8). Total num frames: 23814144. Throughput: 0: 2928.9. Samples: 953064. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:04:13,942][04005] Avg episode reward: [(0, '38.566')] +[2024-07-05 16:04:17,364][04594] Updated weights for policy 0, policy_version 5824 (0.0012) +[2024-07-05 16:04:18,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 23871488. Throughput: 0: 2918.1. Samples: 961558. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:04:18,942][04005] Avg episode reward: [(0, '39.139')] +[2024-07-05 16:04:20,869][04594] Updated weights for policy 0, policy_version 5834 (0.0012) +[2024-07-05 16:04:23,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 23928832. Throughput: 0: 2923.1. Samples: 979216. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:04:23,942][04005] Avg episode reward: [(0, '39.102')] +[2024-07-05 16:04:24,378][04594] Updated weights for policy 0, policy_version 5844 (0.0012) +[2024-07-05 16:04:27,876][04594] Updated weights for policy 0, policy_version 5854 (0.0012) +[2024-07-05 16:04:28,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11704.8). Total num frames: 23990272. Throughput: 0: 2927.2. Samples: 996904. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:04:28,942][04005] Avg episode reward: [(0, '35.748')] +[2024-07-05 16:04:31,391][04594] Updated weights for policy 0, policy_version 5864 (0.0012) +[2024-07-05 16:04:33,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11704.8). Total num frames: 24047616. Throughput: 0: 2918.4. Samples: 1005456. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:04:33,942][04005] Avg episode reward: [(0, '36.600')] +[2024-07-05 16:04:34,180][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000005872_24051712.pth... +[2024-07-05 16:04:34,252][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000005187_21245952.pth +[2024-07-05 16:04:34,893][04594] Updated weights for policy 0, policy_version 5874 (0.0011) +[2024-07-05 16:04:38,387][04594] Updated weights for policy 0, policy_version 5884 (0.0012) +[2024-07-05 16:04:38,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24104960. Throughput: 0: 2917.9. Samples: 1022960. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:04:38,943][04005] Avg episode reward: [(0, '35.120')] +[2024-07-05 16:04:41,908][04594] Updated weights for policy 0, policy_version 5894 (0.0012) +[2024-07-05 16:04:43,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24162304. Throughput: 0: 2917.0. Samples: 1040416. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:04:43,942][04005] Avg episode reward: [(0, '37.577')] +[2024-07-05 16:04:45,409][04594] Updated weights for policy 0, policy_version 5904 (0.0012) +[2024-07-05 16:04:48,928][04594] Updated weights for policy 0, policy_version 5914 (0.0012) +[2024-07-05 16:04:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11704.9). Total num frames: 24223744. Throughput: 0: 2918.6. Samples: 1049380. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:04:48,942][04005] Avg episode reward: [(0, '39.249')] +[2024-07-05 16:04:52,435][04594] Updated weights for policy 0, policy_version 5924 (0.0012) +[2024-07-05 16:04:53,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24281088. Throughput: 0: 2916.0. Samples: 1066866. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:04:53,942][04005] Avg episode reward: [(0, '41.215')] +[2024-07-05 16:04:54,182][04581] Saving new best policy, reward=41.215! +[2024-07-05 16:04:55,948][04594] Updated weights for policy 0, policy_version 5934 (0.0012) +[2024-07-05 16:04:58,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24338432. Throughput: 0: 2917.2. Samples: 1084338. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:04:58,942][04005] Avg episode reward: [(0, '38.113')] +[2024-07-05 16:04:59,449][04594] Updated weights for policy 0, policy_version 5944 (0.0012) +[2024-07-05 16:05:02,963][04594] Updated weights for policy 0, policy_version 5954 (0.0012) +[2024-07-05 16:05:03,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24395776. Throughput: 0: 2926.9. Samples: 1093270. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:05:03,942][04005] Avg episode reward: [(0, '36.393')] +[2024-07-05 16:05:06,475][04594] Updated weights for policy 0, policy_version 5964 (0.0012) +[2024-07-05 16:05:08,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24457216. Throughput: 0: 2923.0. Samples: 1110750. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:05:08,942][04005] Avg episode reward: [(0, '34.817')] +[2024-07-05 16:05:10,003][04594] Updated weights for policy 0, policy_version 5974 (0.0011) +[2024-07-05 16:05:13,497][04594] Updated weights for policy 0, policy_version 5984 (0.0011) +[2024-07-05 16:05:13,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24514560. Throughput: 0: 2916.9. Samples: 1128166. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:05:13,942][04005] Avg episode reward: [(0, '35.833')] +[2024-07-05 16:05:17,002][04594] Updated weights for policy 0, policy_version 5994 (0.0011) +[2024-07-05 16:05:18,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24571904. Throughput: 0: 2916.9. Samples: 1136718. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:05:18,942][04005] Avg episode reward: [(0, '36.809')] +[2024-07-05 16:05:20,516][04594] Updated weights for policy 0, policy_version 6004 (0.0012) +[2024-07-05 16:05:23,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 24629248. Throughput: 0: 2920.1. Samples: 1154366. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:05:23,942][04005] Avg episode reward: [(0, '35.293')] +[2024-07-05 16:05:24,013][04594] Updated weights for policy 0, policy_version 6014 (0.0012) +[2024-07-05 16:05:27,509][04594] Updated weights for policy 0, policy_version 6024 (0.0012) +[2024-07-05 16:05:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24690688. Throughput: 0: 2926.6. Samples: 1172112. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:05:28,942][04005] Avg episode reward: [(0, '38.152')] +[2024-07-05 16:05:31,034][04594] Updated weights for policy 0, policy_version 6034 (0.0011) +[2024-07-05 16:05:33,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24748032. Throughput: 0: 2916.7. Samples: 1180632. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:05:33,942][04005] Avg episode reward: [(0, '38.182')] +[2024-07-05 16:05:34,542][04594] Updated weights for policy 0, policy_version 6044 (0.0011) +[2024-07-05 16:05:38,052][04594] Updated weights for policy 0, policy_version 6054 (0.0012) +[2024-07-05 16:05:38,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24805376. Throughput: 0: 2915.3. Samples: 1198056. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:05:38,942][04005] Avg episode reward: [(0, '38.365')] +[2024-07-05 16:05:41,560][04594] Updated weights for policy 0, policy_version 6064 (0.0012) +[2024-07-05 16:05:43,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 24862720. Throughput: 0: 2914.9. Samples: 1215510. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:05:43,942][04005] Avg episode reward: [(0, '37.063')] +[2024-07-05 16:05:45,077][04594] Updated weights for policy 0, policy_version 6074 (0.0013) +[2024-07-05 16:05:48,577][04594] Updated weights for policy 0, policy_version 6084 (0.0011) +[2024-07-05 16:05:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24924160. Throughput: 0: 2915.2. Samples: 1224456. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:05:48,942][04005] Avg episode reward: [(0, '38.329')] +[2024-07-05 16:05:52,084][04594] Updated weights for policy 0, policy_version 6094 (0.0012) +[2024-07-05 16:05:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 24981504. Throughput: 0: 2915.1. Samples: 1241930. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:05:53,942][04005] Avg episode reward: [(0, '37.814')] +[2024-07-05 16:05:55,596][04594] Updated weights for policy 0, policy_version 6104 (0.0012) +[2024-07-05 16:05:58,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25038848. Throughput: 0: 2916.0. Samples: 1259386. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:05:58,943][04005] Avg episode reward: [(0, '37.550')] +[2024-07-05 16:05:59,115][04594] Updated weights for policy 0, policy_version 6114 (0.0012) +[2024-07-05 16:06:02,643][04594] Updated weights for policy 0, policy_version 6124 (0.0011) +[2024-07-05 16:06:03,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25096192. Throughput: 0: 2920.9. Samples: 1268160. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:06:03,942][04005] Avg episode reward: [(0, '38.212')] +[2024-07-05 16:06:06,139][04594] Updated weights for policy 0, policy_version 6134 (0.0012) +[2024-07-05 16:06:08,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11605.3, 300 sec: 11677.1). Total num frames: 25153536. Throughput: 0: 2918.8. Samples: 1285710. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:06:08,942][04005] Avg episode reward: [(0, '39.495')] +[2024-07-05 16:06:09,678][04594] Updated weights for policy 0, policy_version 6144 (0.0012) +[2024-07-05 16:06:13,188][04594] Updated weights for policy 0, policy_version 6154 (0.0012) +[2024-07-05 16:06:13,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25214976. Throughput: 0: 2912.0. Samples: 1303150. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:06:13,942][04005] Avg episode reward: [(0, '41.898')] +[2024-07-05 16:06:13,945][04581] Saving new best policy, reward=41.898! +[2024-07-05 16:06:16,703][04594] Updated weights for policy 0, policy_version 6164 (0.0012) +[2024-07-05 16:06:18,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25272320. Throughput: 0: 2912.1. Samples: 1311678. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:06:18,942][04005] Avg episode reward: [(0, '41.385')] +[2024-07-05 16:06:20,203][04594] Updated weights for policy 0, policy_version 6174 (0.0012) +[2024-07-05 16:06:23,713][04594] Updated weights for policy 0, policy_version 6184 (0.0012) +[2024-07-05 16:06:23,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25329664. Throughput: 0: 2912.0. Samples: 1329098. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:06:23,942][04005] Avg episode reward: [(0, '38.714')] +[2024-07-05 16:06:27,216][04594] Updated weights for policy 0, policy_version 6194 (0.0012) +[2024-07-05 16:06:28,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 25387008. Throughput: 0: 2916.3. Samples: 1346742. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:06:28,942][04005] Avg episode reward: [(0, '35.164')] +[2024-07-05 16:06:30,733][04594] Updated weights for policy 0, policy_version 6204 (0.0012) +[2024-07-05 16:06:33,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25448448. Throughput: 0: 2912.3. Samples: 1355508. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:06:33,943][04005] Avg episode reward: [(0, '33.322')] +[2024-07-05 16:06:33,946][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000006213_25448448.pth... +[2024-07-05 16:06:34,018][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000005529_22646784.pth +[2024-07-05 16:06:34,289][04594] Updated weights for policy 0, policy_version 6214 (0.0012) +[2024-07-05 16:06:37,758][04594] Updated weights for policy 0, policy_version 6224 (0.0013) +[2024-07-05 16:06:38,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25505792. Throughput: 0: 2911.7. Samples: 1372958. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:06:38,942][04005] Avg episode reward: [(0, '34.852')] +[2024-07-05 16:06:41,276][04594] Updated weights for policy 0, policy_version 6234 (0.0012) +[2024-07-05 16:06:43,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25563136. Throughput: 0: 2911.9. Samples: 1390422. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:06:43,943][04005] Avg episode reward: [(0, '38.761')] +[2024-07-05 16:06:44,784][04594] Updated weights for policy 0, policy_version 6244 (0.0012) +[2024-07-05 16:06:48,296][04594] Updated weights for policy 0, policy_version 6254 (0.0011) +[2024-07-05 16:06:48,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11677.1). Total num frames: 25620480. Throughput: 0: 2915.3. Samples: 1399350. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:06:48,942][04005] Avg episode reward: [(0, '39.320')] +[2024-07-05 16:06:51,802][04594] Updated weights for policy 0, policy_version 6264 (0.0011) +[2024-07-05 16:06:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11691.0). Total num frames: 25681920. Throughput: 0: 2913.0. Samples: 1416796. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:06:53,942][04005] Avg episode reward: [(0, '39.177')] +[2024-07-05 16:06:55,313][04594] Updated weights for policy 0, policy_version 6274 (0.0012) +[2024-07-05 16:06:58,832][04594] Updated weights for policy 0, policy_version 6284 (0.0012) +[2024-07-05 16:06:58,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25739264. Throughput: 0: 2914.3. Samples: 1434294. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:06:58,943][04005] Avg episode reward: [(0, '36.483')] +[2024-07-05 16:07:02,347][04594] Updated weights for policy 0, policy_version 6294 (0.0012) +[2024-07-05 16:07:03,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25796608. Throughput: 0: 2914.6. Samples: 1442834. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:07:03,942][04005] Avg episode reward: [(0, '38.030')] +[2024-07-05 16:07:05,882][04594] Updated weights for policy 0, policy_version 6304 (0.0012) +[2024-07-05 16:07:08,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25853952. Throughput: 0: 2917.4. Samples: 1460380. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:07:08,942][04005] Avg episode reward: [(0, '39.230')] +[2024-07-05 16:07:09,391][04594] Updated weights for policy 0, policy_version 6314 (0.0012) +[2024-07-05 16:07:12,901][04594] Updated weights for policy 0, policy_version 6324 (0.0012) +[2024-07-05 16:07:13,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11677.1). Total num frames: 25911296. Throughput: 0: 2917.5. Samples: 1478030. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:07:13,942][04005] Avg episode reward: [(0, '39.288')] +[2024-07-05 16:07:16,409][04594] Updated weights for policy 0, policy_version 6334 (0.0012) +[2024-07-05 16:07:18,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 25972736. Throughput: 0: 2915.9. Samples: 1486722. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:07:18,942][04005] Avg episode reward: [(0, '39.710')] +[2024-07-05 16:07:19,916][04594] Updated weights for policy 0, policy_version 6344 (0.0012) +[2024-07-05 16:07:23,425][04594] Updated weights for policy 0, policy_version 6354 (0.0012) +[2024-07-05 16:07:23,942][04005] Fps is (10 sec: 11878.0, 60 sec: 11673.5, 300 sec: 11677.1). Total num frames: 26030080. Throughput: 0: 2916.6. Samples: 1504206. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:07:23,943][04005] Avg episode reward: [(0, '36.320')] +[2024-07-05 16:07:26,936][04594] Updated weights for policy 0, policy_version 6364 (0.0013) +[2024-07-05 16:07:28,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 26087424. Throughput: 0: 2915.7. Samples: 1521630. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:07:28,942][04005] Avg episode reward: [(0, '37.512')] +[2024-07-05 16:07:30,452][04594] Updated weights for policy 0, policy_version 6374 (0.0012) +[2024-07-05 16:07:33,941][04005] Fps is (10 sec: 11469.2, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 26144768. Throughput: 0: 2916.8. Samples: 1530608. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:07:33,942][04005] Avg episode reward: [(0, '38.490')] +[2024-07-05 16:07:33,951][04594] Updated weights for policy 0, policy_version 6384 (0.0012) +[2024-07-05 16:07:37,461][04594] Updated weights for policy 0, policy_version 6394 (0.0012) +[2024-07-05 16:07:38,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 26206208. Throughput: 0: 2917.3. Samples: 1548076. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:07:38,942][04005] Avg episode reward: [(0, '39.602')] +[2024-07-05 16:07:40,980][04594] Updated weights for policy 0, policy_version 6404 (0.0012) +[2024-07-05 16:07:43,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 26263552. Throughput: 0: 2917.0. Samples: 1565558. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:07:43,943][04005] Avg episode reward: [(0, '39.232')] +[2024-07-05 16:07:44,494][04594] Updated weights for policy 0, policy_version 6414 (0.0011) +[2024-07-05 16:07:48,011][04594] Updated weights for policy 0, policy_version 6424 (0.0012) +[2024-07-05 16:07:48,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 26320896. Throughput: 0: 2921.0. Samples: 1574278. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:07:48,943][04005] Avg episode reward: [(0, '39.272')] +[2024-07-05 16:07:51,515][04594] Updated weights for policy 0, policy_version 6434 (0.0012) +[2024-07-05 16:07:53,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 26378240. Throughput: 0: 2924.0. Samples: 1591960. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:07:53,943][04005] Avg episode reward: [(0, '39.161')] +[2024-07-05 16:07:55,035][04594] Updated weights for policy 0, policy_version 6444 (0.0012) +[2024-07-05 16:07:58,533][04594] Updated weights for policy 0, policy_version 6454 (0.0011) +[2024-07-05 16:07:58,941][04005] Fps is (10 sec: 11878.7, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 26439680. Throughput: 0: 2919.0. Samples: 1609384. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:07:58,942][04005] Avg episode reward: [(0, '36.501')] +[2024-07-05 16:08:02,051][04594] Updated weights for policy 0, policy_version 6464 (0.0012) +[2024-07-05 16:08:03,942][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 26497024. Throughput: 0: 2914.8. Samples: 1617888. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:03,943][04005] Avg episode reward: [(0, '35.316')] +[2024-07-05 16:08:05,573][04594] Updated weights for policy 0, policy_version 6474 (0.0012) +[2024-07-05 16:08:08,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 26554368. Throughput: 0: 2913.8. Samples: 1635326. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:08,942][04005] Avg episode reward: [(0, '37.040')] +[2024-07-05 16:08:09,080][04594] Updated weights for policy 0, policy_version 6484 (0.0011) +[2024-07-05 16:08:12,611][04594] Updated weights for policy 0, policy_version 6494 (0.0012) +[2024-07-05 16:08:13,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 26611712. Throughput: 0: 2913.6. Samples: 1652742. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:13,943][04005] Avg episode reward: [(0, '38.075')] +[2024-07-05 16:08:16,124][04594] Updated weights for policy 0, policy_version 6504 (0.0012) +[2024-07-05 16:08:18,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 26673152. Throughput: 0: 2912.9. Samples: 1661690. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:18,942][04005] Avg episode reward: [(0, '40.264')] +[2024-07-05 16:08:19,639][04594] Updated weights for policy 0, policy_version 6514 (0.0012) +[2024-07-05 16:08:23,146][04594] Updated weights for policy 0, policy_version 6524 (0.0012) +[2024-07-05 16:08:23,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11673.7, 300 sec: 11677.1). Total num frames: 26730496. Throughput: 0: 2912.7. Samples: 1679148. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:23,942][04005] Avg episode reward: [(0, '40.484')] +[2024-07-05 16:08:26,662][04594] Updated weights for policy 0, policy_version 6534 (0.0012) +[2024-07-05 16:08:28,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 26787840. Throughput: 0: 2912.4. Samples: 1696614. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:28,943][04005] Avg episode reward: [(0, '40.487')] +[2024-07-05 16:08:30,153][04594] Updated weights for policy 0, policy_version 6544 (0.0012) +[2024-07-05 16:08:33,677][04594] Updated weights for policy 0, policy_version 6554 (0.0012) +[2024-07-05 16:08:33,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 26845184. Throughput: 0: 2917.8. Samples: 1705578. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:33,942][04005] Avg episode reward: [(0, '37.458')] +[2024-07-05 16:08:34,025][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000006555_26849280.pth... +[2024-07-05 16:08:34,097][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000005872_24051712.pth +[2024-07-05 16:08:37,193][04594] Updated weights for policy 0, policy_version 6564 (0.0011) +[2024-07-05 16:08:38,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 26902528. Throughput: 0: 2912.0. Samples: 1723002. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:38,943][04005] Avg episode reward: [(0, '35.862')] +[2024-07-05 16:08:40,722][04594] Updated weights for policy 0, policy_version 6574 (0.0012) +[2024-07-05 16:08:43,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11677.1). Total num frames: 26963968. Throughput: 0: 2911.1. Samples: 1740386. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:43,942][04005] Avg episode reward: [(0, '35.705')] +[2024-07-05 16:08:44,234][04594] Updated weights for policy 0, policy_version 6584 (0.0012) +[2024-07-05 16:08:47,754][04594] Updated weights for policy 0, policy_version 6594 (0.0011) +[2024-07-05 16:08:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27021312. Throughput: 0: 2911.9. Samples: 1748924. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:48,942][04005] Avg episode reward: [(0, '36.941')] +[2024-07-05 16:08:51,254][04594] Updated weights for policy 0, policy_version 6604 (0.0011) +[2024-07-05 16:08:53,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27078656. Throughput: 0: 2912.6. Samples: 1766394. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:53,942][04005] Avg episode reward: [(0, '37.346')] +[2024-07-05 16:08:54,764][04594] Updated weights for policy 0, policy_version 6614 (0.0011) +[2024-07-05 16:08:58,269][04594] Updated weights for policy 0, policy_version 6624 (0.0012) +[2024-07-05 16:08:58,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 27136000. Throughput: 0: 2918.8. Samples: 1784086. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:08:58,943][04005] Avg episode reward: [(0, '37.157')] +[2024-07-05 16:09:01,773][04594] Updated weights for policy 0, policy_version 6634 (0.0012) +[2024-07-05 16:09:03,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27197440. Throughput: 0: 2914.2. Samples: 1792830. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:09:03,942][04005] Avg episode reward: [(0, '38.656')] +[2024-07-05 16:09:05,279][04594] Updated weights for policy 0, policy_version 6644 (0.0011) +[2024-07-05 16:09:08,790][04594] Updated weights for policy 0, policy_version 6654 (0.0012) +[2024-07-05 16:09:08,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27254784. Throughput: 0: 2914.6. Samples: 1810304. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:09:08,943][04005] Avg episode reward: [(0, '37.093')] +[2024-07-05 16:09:12,311][04594] Updated weights for policy 0, policy_version 6664 (0.0012) +[2024-07-05 16:09:13,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27312128. Throughput: 0: 2914.0. Samples: 1827746. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:09:13,943][04005] Avg episode reward: [(0, '37.523')] +[2024-07-05 16:09:15,821][04594] Updated weights for policy 0, policy_version 6674 (0.0012) +[2024-07-05 16:09:18,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11663.2). Total num frames: 27369472. Throughput: 0: 2913.8. Samples: 1836700. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:09:18,942][04005] Avg episode reward: [(0, '38.064')] +[2024-07-05 16:09:19,335][04594] Updated weights for policy 0, policy_version 6684 (0.0012) +[2024-07-05 16:09:22,839][04594] Updated weights for policy 0, policy_version 6694 (0.0012) +[2024-07-05 16:09:23,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27430912. Throughput: 0: 2914.3. Samples: 1854146. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:09:23,942][04005] Avg episode reward: [(0, '40.388')] +[2024-07-05 16:09:26,360][04594] Updated weights for policy 0, policy_version 6704 (0.0012) +[2024-07-05 16:09:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27488256. Throughput: 0: 2917.4. Samples: 1871670. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:09:28,942][04005] Avg episode reward: [(0, '38.826')] +[2024-07-05 16:09:29,876][04594] Updated weights for policy 0, policy_version 6714 (0.0012) +[2024-07-05 16:09:33,380][04594] Updated weights for policy 0, policy_version 6724 (0.0012) +[2024-07-05 16:09:33,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27545600. Throughput: 0: 2919.4. Samples: 1880296. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:09:33,942][04005] Avg episode reward: [(0, '36.840')] +[2024-07-05 16:09:36,894][04594] Updated weights for policy 0, policy_version 6734 (0.0012) +[2024-07-05 16:09:38,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27602944. Throughput: 0: 2921.8. Samples: 1897876. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:09:38,942][04005] Avg episode reward: [(0, '37.263')] +[2024-07-05 16:09:40,420][04594] Updated weights for policy 0, policy_version 6744 (0.0011) +[2024-07-05 16:09:43,933][04594] Updated weights for policy 0, policy_version 6754 (0.0012) +[2024-07-05 16:09:43,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27664384. Throughput: 0: 2918.9. Samples: 1915438. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:09:43,942][04005] Avg episode reward: [(0, '36.834')] +[2024-07-05 16:09:47,452][04594] Updated weights for policy 0, policy_version 6764 (0.0012) +[2024-07-05 16:09:48,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27721728. Throughput: 0: 2914.8. Samples: 1923998. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:09:48,942][04005] Avg episode reward: [(0, '39.941')] +[2024-07-05 16:09:50,970][04594] Updated weights for policy 0, policy_version 6774 (0.0012) +[2024-07-05 16:09:53,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27779072. Throughput: 0: 2914.5. Samples: 1941456. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:09:53,943][04005] Avg episode reward: [(0, '40.073')] +[2024-07-05 16:09:54,476][04594] Updated weights for policy 0, policy_version 6784 (0.0012) +[2024-07-05 16:09:57,980][04594] Updated weights for policy 0, policy_version 6794 (0.0011) +[2024-07-05 16:09:58,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27836416. Throughput: 0: 2915.2. Samples: 1958930. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:09:58,943][04005] Avg episode reward: [(0, '41.417')] +[2024-07-05 16:10:01,495][04594] Updated weights for policy 0, policy_version 6804 (0.0012) +[2024-07-05 16:10:03,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 27893760. Throughput: 0: 2915.1. Samples: 1967880. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:10:03,943][04005] Avg episode reward: [(0, '40.898')] +[2024-07-05 16:10:05,009][04594] Updated weights for policy 0, policy_version 6814 (0.0012) +[2024-07-05 16:10:08,516][04594] Updated weights for policy 0, policy_version 6824 (0.0012) +[2024-07-05 16:10:08,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 27955200. Throughput: 0: 2915.1. Samples: 1985324. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:10:08,942][04005] Avg episode reward: [(0, '43.238')] +[2024-07-05 16:10:08,943][04581] Saving new best policy, reward=43.238! +[2024-07-05 16:10:12,050][04594] Updated weights for policy 0, policy_version 6834 (0.0012) +[2024-07-05 16:10:13,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 28012544. Throughput: 0: 2912.7. Samples: 2002740. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:10:13,942][04005] Avg episode reward: [(0, '42.991')] +[2024-07-05 16:10:15,554][04594] Updated weights for policy 0, policy_version 6844 (0.0013) +[2024-07-05 16:10:18,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 28069888. Throughput: 0: 2915.1. Samples: 2011474. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:10:18,942][04005] Avg episode reward: [(0, '42.267')] +[2024-07-05 16:10:19,068][04594] Updated weights for policy 0, policy_version 6854 (0.0012) +[2024-07-05 16:10:22,592][04594] Updated weights for policy 0, policy_version 6864 (0.0012) +[2024-07-05 16:10:23,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 28127232. Throughput: 0: 2913.5. Samples: 2028982. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:10:23,942][04005] Avg episode reward: [(0, '40.480')] +[2024-07-05 16:10:26,109][04594] Updated weights for policy 0, policy_version 6874 (0.0012) +[2024-07-05 16:10:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 28188672. Throughput: 0: 2912.0. Samples: 2046480. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:10:28,942][04005] Avg episode reward: [(0, '41.765')] +[2024-07-05 16:10:29,633][04594] Updated weights for policy 0, policy_version 6884 (0.0012) +[2024-07-05 16:10:33,156][04594] Updated weights for policy 0, policy_version 6894 (0.0012) +[2024-07-05 16:10:33,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 28246016. Throughput: 0: 2912.6. Samples: 2055064. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:10:33,942][04005] Avg episode reward: [(0, '41.493')] +[2024-07-05 16:10:34,204][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000006897_28250112.pth... +[2024-07-05 16:10:34,276][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000006213_25448448.pth +[2024-07-05 16:10:36,675][04594] Updated weights for policy 0, policy_version 6904 (0.0011) +[2024-07-05 16:10:38,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 28303360. Throughput: 0: 2911.2. Samples: 2072462. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:10:38,942][04005] Avg episode reward: [(0, '42.011')] +[2024-07-05 16:10:40,215][04594] Updated weights for policy 0, policy_version 6914 (0.0012) +[2024-07-05 16:10:43,722][04594] Updated weights for policy 0, policy_version 6924 (0.0013) +[2024-07-05 16:10:43,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 28360704. Throughput: 0: 2910.8. Samples: 2089916. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:10:43,942][04005] Avg episode reward: [(0, '43.406')] +[2024-07-05 16:10:44,068][04581] Saving new best policy, reward=43.406! +[2024-07-05 16:10:47,252][04594] Updated weights for policy 0, policy_version 6934 (0.0012) +[2024-07-05 16:10:48,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.4, 300 sec: 11649.3). Total num frames: 28418048. Throughput: 0: 2909.6. Samples: 2098810. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:10:48,942][04005] Avg episode reward: [(0, '44.616')] +[2024-07-05 16:10:48,999][04581] Saving new best policy, reward=44.616! +[2024-07-05 16:10:50,765][04594] Updated weights for policy 0, policy_version 6944 (0.0011) +[2024-07-05 16:10:53,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 28479488. Throughput: 0: 2909.4. Samples: 2116248. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:10:53,942][04005] Avg episode reward: [(0, '43.147')] +[2024-07-05 16:10:54,287][04594] Updated weights for policy 0, policy_version 6954 (0.0012) +[2024-07-05 16:10:57,804][04594] Updated weights for policy 0, policy_version 6964 (0.0012) +[2024-07-05 16:10:58,942][04005] Fps is (10 sec: 11878.1, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 28536832. Throughput: 0: 2909.0. Samples: 2133646. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:10:58,942][04005] Avg episode reward: [(0, '42.230')] +[2024-07-05 16:11:01,311][04594] Updated weights for policy 0, policy_version 6974 (0.0011) +[2024-07-05 16:11:03,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 28594176. Throughput: 0: 2904.9. Samples: 2142194. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:11:03,943][04005] Avg episode reward: [(0, '40.920')] +[2024-07-05 16:11:04,825][04594] Updated weights for policy 0, policy_version 6984 (0.0011) +[2024-07-05 16:11:08,328][04594] Updated weights for policy 0, policy_version 6994 (0.0012) +[2024-07-05 16:11:08,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 28651520. Throughput: 0: 2906.9. Samples: 2159792. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:11:08,942][04005] Avg episode reward: [(0, '42.598')] +[2024-07-05 16:11:11,851][04594] Updated weights for policy 0, policy_version 7004 (0.0012) +[2024-07-05 16:11:13,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 28708864. Throughput: 0: 2907.9. Samples: 2177336. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:11:13,943][04005] Avg episode reward: [(0, '42.848')] +[2024-07-05 16:11:15,359][04594] Updated weights for policy 0, policy_version 7014 (0.0012) +[2024-07-05 16:11:18,864][04594] Updated weights for policy 0, policy_version 7024 (0.0012) +[2024-07-05 16:11:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 28770304. Throughput: 0: 2910.0. Samples: 2186014. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:11:18,942][04005] Avg episode reward: [(0, '42.863')] +[2024-07-05 16:11:22,392][04594] Updated weights for policy 0, policy_version 7034 (0.0012) +[2024-07-05 16:11:23,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 28827648. Throughput: 0: 2910.9. Samples: 2203454. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:11:23,942][04005] Avg episode reward: [(0, '40.716')] +[2024-07-05 16:11:25,908][04594] Updated weights for policy 0, policy_version 7044 (0.0011) +[2024-07-05 16:11:28,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 28884992. Throughput: 0: 2911.4. Samples: 2220930. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:11:28,942][04005] Avg episode reward: [(0, '41.833')] +[2024-07-05 16:11:29,411][04594] Updated weights for policy 0, policy_version 7054 (0.0012) +[2024-07-05 16:11:32,916][04594] Updated weights for policy 0, policy_version 7064 (0.0012) +[2024-07-05 16:11:33,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 28942336. Throughput: 0: 2912.5. Samples: 2229872. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:11:33,942][04005] Avg episode reward: [(0, '41.820')] +[2024-07-05 16:11:36,435][04594] Updated weights for policy 0, policy_version 7074 (0.0012) +[2024-07-05 16:11:38,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29003776. Throughput: 0: 2912.7. Samples: 2247320. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:11:38,942][04005] Avg episode reward: [(0, '42.209')] +[2024-07-05 16:11:39,960][04594] Updated weights for policy 0, policy_version 7084 (0.0012) +[2024-07-05 16:11:43,469][04594] Updated weights for policy 0, policy_version 7094 (0.0012) +[2024-07-05 16:11:43,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29061120. Throughput: 0: 2913.0. Samples: 2264730. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:11:43,942][04005] Avg episode reward: [(0, '43.482')] +[2024-07-05 16:11:46,979][04594] Updated weights for policy 0, policy_version 7104 (0.0012) +[2024-07-05 16:11:48,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 29118464. Throughput: 0: 2915.5. Samples: 2273392. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:11:48,943][04005] Avg episode reward: [(0, '42.050')] +[2024-07-05 16:11:50,483][04594] Updated weights for policy 0, policy_version 7114 (0.0011) +[2024-07-05 16:11:53,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 29175808. Throughput: 0: 2917.4. Samples: 2291074. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:11:53,942][04005] Avg episode reward: [(0, '40.702')] +[2024-07-05 16:11:53,991][04594] Updated weights for policy 0, policy_version 7124 (0.0012) +[2024-07-05 16:11:57,495][04594] Updated weights for policy 0, policy_version 7134 (0.0011) +[2024-07-05 16:11:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29237248. Throughput: 0: 2917.8. Samples: 2308636. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:11:58,942][04005] Avg episode reward: [(0, '40.698')] +[2024-07-05 16:12:01,014][04594] Updated weights for policy 0, policy_version 7144 (0.0012) +[2024-07-05 16:12:03,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29294592. Throughput: 0: 2914.3. Samples: 2317158. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:03,942][04005] Avg episode reward: [(0, '41.819')] +[2024-07-05 16:12:04,529][04594] Updated weights for policy 0, policy_version 7154 (0.0012) +[2024-07-05 16:12:08,022][04594] Updated weights for policy 0, policy_version 7164 (0.0012) +[2024-07-05 16:12:08,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29351936. Throughput: 0: 2915.1. Samples: 2334636. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:08,943][04005] Avg episode reward: [(0, '42.453')] +[2024-07-05 16:12:11,548][04594] Updated weights for policy 0, policy_version 7174 (0.0012) +[2024-07-05 16:12:13,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 29409280. Throughput: 0: 2914.4. Samples: 2352076. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:13,942][04005] Avg episode reward: [(0, '43.004')] +[2024-07-05 16:12:15,059][04594] Updated weights for policy 0, policy_version 7184 (0.0012) +[2024-07-05 16:12:18,560][04594] Updated weights for policy 0, policy_version 7194 (0.0012) +[2024-07-05 16:12:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29470720. Throughput: 0: 2914.5. Samples: 2361026. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:18,942][04005] Avg episode reward: [(0, '43.932')] +[2024-07-05 16:12:22,083][04594] Updated weights for policy 0, policy_version 7204 (0.0012) +[2024-07-05 16:12:23,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29528064. Throughput: 0: 2914.7. Samples: 2378480. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:23,943][04005] Avg episode reward: [(0, '43.726')] +[2024-07-05 16:12:25,595][04594] Updated weights for policy 0, policy_version 7214 (0.0012) +[2024-07-05 16:12:28,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29585408. Throughput: 0: 2914.5. Samples: 2395884. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:28,942][04005] Avg episode reward: [(0, '43.135')] +[2024-07-05 16:12:29,110][04594] Updated weights for policy 0, policy_version 7224 (0.0011) +[2024-07-05 16:12:32,616][04594] Updated weights for policy 0, policy_version 7234 (0.0012) +[2024-07-05 16:12:33,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 29642752. Throughput: 0: 2921.3. Samples: 2404850. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:33,943][04005] Avg episode reward: [(0, '42.912')] +[2024-07-05 16:12:34,023][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000007238_29646848.pth... +[2024-07-05 16:12:34,095][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000006555_26849280.pth +[2024-07-05 16:12:36,146][04594] Updated weights for policy 0, policy_version 7244 (0.0012) +[2024-07-05 16:12:38,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 29700096. Throughput: 0: 2915.4. Samples: 2422266. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:38,943][04005] Avg episode reward: [(0, '43.262')] +[2024-07-05 16:12:39,674][04594] Updated weights for policy 0, policy_version 7254 (0.0012) +[2024-07-05 16:12:43,179][04594] Updated weights for policy 0, policy_version 7264 (0.0011) +[2024-07-05 16:12:43,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29761536. Throughput: 0: 2912.5. Samples: 2439698. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:43,942][04005] Avg episode reward: [(0, '42.142')] +[2024-07-05 16:12:46,701][04594] Updated weights for policy 0, policy_version 7274 (0.0012) +[2024-07-05 16:12:48,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29818880. Throughput: 0: 2911.9. Samples: 2448194. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:48,942][04005] Avg episode reward: [(0, '41.035')] +[2024-07-05 16:12:50,196][04594] Updated weights for policy 0, policy_version 7284 (0.0012) +[2024-07-05 16:12:53,715][04594] Updated weights for policy 0, policy_version 7294 (0.0012) +[2024-07-05 16:12:53,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 29876224. Throughput: 0: 2911.7. Samples: 2465664. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:53,943][04005] Avg episode reward: [(0, '42.210')] +[2024-07-05 16:12:57,227][04594] Updated weights for policy 0, policy_version 7304 (0.0012) +[2024-07-05 16:12:58,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 29933568. Throughput: 0: 2914.5. Samples: 2483228. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:12:58,942][04005] Avg episode reward: [(0, '42.615')] +[2024-07-05 16:13:00,741][04594] Updated weights for policy 0, policy_version 7314 (0.0011) +[2024-07-05 16:13:03,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 29995008. Throughput: 0: 2912.2. Samples: 2492076. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:03,942][04005] Avg episode reward: [(0, '42.768')] +[2024-07-05 16:13:04,254][04594] Updated weights for policy 0, policy_version 7324 (0.0012) +[2024-07-05 16:13:07,762][04594] Updated weights for policy 0, policy_version 7334 (0.0011) +[2024-07-05 16:13:08,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 30052352. Throughput: 0: 2912.2. Samples: 2509528. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:08,942][04005] Avg episode reward: [(0, '44.627')] +[2024-07-05 16:13:09,159][04581] Saving new best policy, reward=44.627! +[2024-07-05 16:13:11,295][04594] Updated weights for policy 0, policy_version 7344 (0.0012) +[2024-07-05 16:13:13,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 30109696. Throughput: 0: 2912.7. Samples: 2526958. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:13,943][04005] Avg episode reward: [(0, '47.066')] +[2024-07-05 16:13:14,103][04581] Saving new best policy, reward=47.066! +[2024-07-05 16:13:14,818][04594] Updated weights for policy 0, policy_version 7354 (0.0011) +[2024-07-05 16:13:18,322][04594] Updated weights for policy 0, policy_version 7364 (0.0012) +[2024-07-05 16:13:18,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 30167040. Throughput: 0: 2911.9. Samples: 2535886. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:18,942][04005] Avg episode reward: [(0, '47.367')] +[2024-07-05 16:13:19,029][04581] Saving new best policy, reward=47.367! +[2024-07-05 16:13:21,840][04594] Updated weights for policy 0, policy_version 7374 (0.0011) +[2024-07-05 16:13:23,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 30224384. Throughput: 0: 2912.9. Samples: 2553348. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:23,942][04005] Avg episode reward: [(0, '45.956')] +[2024-07-05 16:13:25,359][04594] Updated weights for policy 0, policy_version 7384 (0.0012) +[2024-07-05 16:13:28,866][04594] Updated weights for policy 0, policy_version 7394 (0.0012) +[2024-07-05 16:13:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 30285824. Throughput: 0: 2912.8. Samples: 2570776. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:28,943][04005] Avg episode reward: [(0, '43.665')] +[2024-07-05 16:13:32,385][04594] Updated weights for policy 0, policy_version 7404 (0.0012) +[2024-07-05 16:13:33,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 30343168. Throughput: 0: 2913.6. Samples: 2579304. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:33,943][04005] Avg episode reward: [(0, '45.070')] +[2024-07-05 16:13:35,890][04594] Updated weights for policy 0, policy_version 7414 (0.0011) +[2024-07-05 16:13:38,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 30400512. Throughput: 0: 2915.1. Samples: 2596842. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:38,942][04005] Avg episode reward: [(0, '45.274')] +[2024-07-05 16:13:39,458][04594] Updated weights for policy 0, policy_version 7424 (0.0012) +[2024-07-05 16:13:42,919][04594] Updated weights for policy 0, policy_version 7434 (0.0011) +[2024-07-05 16:13:43,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 30457856. Throughput: 0: 2915.1. Samples: 2614410. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:43,942][04005] Avg episode reward: [(0, '45.026')] +[2024-07-05 16:13:46,440][04594] Updated weights for policy 0, policy_version 7444 (0.0012) +[2024-07-05 16:13:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 30519296. Throughput: 0: 2914.0. Samples: 2623204. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:48,942][04005] Avg episode reward: [(0, '43.992')] +[2024-07-05 16:13:49,951][04594] Updated weights for policy 0, policy_version 7454 (0.0012) +[2024-07-05 16:13:53,488][04594] Updated weights for policy 0, policy_version 7464 (0.0012) +[2024-07-05 16:13:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 30576640. Throughput: 0: 2913.1. Samples: 2640618. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:53,942][04005] Avg episode reward: [(0, '45.407')] +[2024-07-05 16:13:56,999][04594] Updated weights for policy 0, policy_version 7474 (0.0012) +[2024-07-05 16:13:58,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 30633984. Throughput: 0: 2913.3. Samples: 2658058. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:13:58,942][04005] Avg episode reward: [(0, '46.773')] +[2024-07-05 16:14:00,516][04594] Updated weights for policy 0, policy_version 7484 (0.0012) +[2024-07-05 16:14:03,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 30691328. Throughput: 0: 2913.4. Samples: 2666990. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:14:03,942][04005] Avg episode reward: [(0, '46.169')] +[2024-07-05 16:14:04,022][04594] Updated weights for policy 0, policy_version 7494 (0.0011) +[2024-07-05 16:14:07,543][04594] Updated weights for policy 0, policy_version 7504 (0.0012) +[2024-07-05 16:14:08,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 30748672. Throughput: 0: 2912.9. Samples: 2684430. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:14:08,943][04005] Avg episode reward: [(0, '46.517')] +[2024-07-05 16:14:11,084][04594] Updated weights for policy 0, policy_version 7514 (0.0012) +[2024-07-05 16:14:13,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 30810112. Throughput: 0: 2913.0. Samples: 2701862. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:14:13,942][04005] Avg episode reward: [(0, '44.600')] +[2024-07-05 16:14:14,591][04594] Updated weights for policy 0, policy_version 7524 (0.0012) +[2024-07-05 16:14:18,116][04594] Updated weights for policy 0, policy_version 7534 (0.0011) +[2024-07-05 16:14:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 30867456. Throughput: 0: 2912.9. Samples: 2710386. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:14:18,942][04005] Avg episode reward: [(0, '44.582')] +[2024-07-05 16:14:21,628][04594] Updated weights for policy 0, policy_version 7544 (0.0012) +[2024-07-05 16:14:23,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 30924800. Throughput: 0: 2910.5. Samples: 2727814. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:14:23,942][04005] Avg episode reward: [(0, '44.007')] +[2024-07-05 16:14:25,141][04594] Updated weights for policy 0, policy_version 7554 (0.0011) +[2024-07-05 16:14:28,651][04594] Updated weights for policy 0, policy_version 7564 (0.0012) +[2024-07-05 16:14:28,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 30982144. Throughput: 0: 2908.7. Samples: 2745302. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:14:28,942][04005] Avg episode reward: [(0, '45.314')] +[2024-07-05 16:14:32,160][04594] Updated weights for policy 0, policy_version 7574 (0.0012) +[2024-07-05 16:14:33,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 31043584. Throughput: 0: 2912.2. Samples: 2754254. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:14:33,943][04005] Avg episode reward: [(0, '43.264')] +[2024-07-05 16:14:33,946][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000007579_31043584.pth... +[2024-07-05 16:14:34,019][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000006897_28250112.pth +[2024-07-05 16:14:35,690][04594] Updated weights for policy 0, policy_version 7584 (0.0011) +[2024-07-05 16:14:38,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 31100928. Throughput: 0: 2913.1. Samples: 2771708. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:14:38,942][04005] Avg episode reward: [(0, '43.272')] +[2024-07-05 16:14:39,203][04594] Updated weights for policy 0, policy_version 7594 (0.0012) +[2024-07-05 16:14:42,729][04594] Updated weights for policy 0, policy_version 7604 (0.0012) +[2024-07-05 16:14:43,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 31158272. Throughput: 0: 2913.2. Samples: 2789152. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:14:43,942][04005] Avg episode reward: [(0, '41.753')] +[2024-07-05 16:14:46,242][04594] Updated weights for policy 0, policy_version 7614 (0.0012) +[2024-07-05 16:14:48,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 31215616. Throughput: 0: 2910.0. Samples: 2797942. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:14:48,943][04005] Avg episode reward: [(0, '43.471')] +[2024-07-05 16:14:49,773][04594] Updated weights for policy 0, policy_version 7624 (0.0012) +[2024-07-05 16:14:53,282][04594] Updated weights for policy 0, policy_version 7634 (0.0011) +[2024-07-05 16:14:53,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 31272960. Throughput: 0: 2912.0. Samples: 2815472. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:14:53,943][04005] Avg episode reward: [(0, '43.800')] +[2024-07-05 16:14:56,802][04594] Updated weights for policy 0, policy_version 7644 (0.0012) +[2024-07-05 16:14:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 31334400. Throughput: 0: 2913.6. Samples: 2832974. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:14:58,942][04005] Avg episode reward: [(0, '45.194')] +[2024-07-05 16:15:00,331][04594] Updated weights for policy 0, policy_version 7654 (0.0012) +[2024-07-05 16:15:03,835][04594] Updated weights for policy 0, policy_version 7664 (0.0012) +[2024-07-05 16:15:03,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 31391744. Throughput: 0: 2912.8. Samples: 2841464. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:03,943][04005] Avg episode reward: [(0, '44.621')] +[2024-07-05 16:15:07,355][04594] Updated weights for policy 0, policy_version 7674 (0.0012) +[2024-07-05 16:15:08,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 31449088. Throughput: 0: 2912.8. Samples: 2858890. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:08,942][04005] Avg episode reward: [(0, '45.975')] +[2024-07-05 16:15:10,885][04594] Updated weights for policy 0, policy_version 7684 (0.0011) +[2024-07-05 16:15:13,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 31506432. Throughput: 0: 2912.5. Samples: 2876364. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:13,942][04005] Avg episode reward: [(0, '46.466')] +[2024-07-05 16:15:14,394][04594] Updated weights for policy 0, policy_version 7694 (0.0012) +[2024-07-05 16:15:17,903][04594] Updated weights for policy 0, policy_version 7704 (0.0011) +[2024-07-05 16:15:18,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 31563776. Throughput: 0: 2911.9. Samples: 2885288. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:18,942][04005] Avg episode reward: [(0, '48.412')] +[2024-07-05 16:15:18,954][04581] Saving new best policy, reward=48.412! +[2024-07-05 16:15:21,417][04594] Updated weights for policy 0, policy_version 7714 (0.0012) +[2024-07-05 16:15:23,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 31625216. Throughput: 0: 2912.5. Samples: 2902772. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:23,942][04005] Avg episode reward: [(0, '46.420')] +[2024-07-05 16:15:24,920][04594] Updated weights for policy 0, policy_version 7724 (0.0011) +[2024-07-05 16:15:28,437][04594] Updated weights for policy 0, policy_version 7734 (0.0011) +[2024-07-05 16:15:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 31682560. Throughput: 0: 2912.5. Samples: 2920216. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:28,942][04005] Avg episode reward: [(0, '44.816')] +[2024-07-05 16:15:31,941][04594] Updated weights for policy 0, policy_version 7744 (0.0011) +[2024-07-05 16:15:33,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 31739904. Throughput: 0: 2914.2. Samples: 2929082. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:33,942][04005] Avg episode reward: [(0, '44.984')] +[2024-07-05 16:15:35,456][04594] Updated weights for policy 0, policy_version 7754 (0.0011) +[2024-07-05 16:15:38,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11649.3). Total num frames: 31797248. Throughput: 0: 2914.4. Samples: 2946620. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:38,942][04005] Avg episode reward: [(0, '45.549')] +[2024-07-05 16:15:38,964][04594] Updated weights for policy 0, policy_version 7764 (0.0012) +[2024-07-05 16:15:42,491][04594] Updated weights for policy 0, policy_version 7774 (0.0012) +[2024-07-05 16:15:43,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 31858688. Throughput: 0: 2913.2. Samples: 2964068. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:43,942][04005] Avg episode reward: [(0, '47.283')] +[2024-07-05 16:15:45,996][04594] Updated weights for policy 0, policy_version 7784 (0.0011) +[2024-07-05 16:15:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 31916032. Throughput: 0: 2914.6. Samples: 2972622. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:48,942][04005] Avg episode reward: [(0, '47.540')] +[2024-07-05 16:15:49,466][04594] Updated weights for policy 0, policy_version 7794 (0.0011) +[2024-07-05 16:15:52,944][04594] Updated weights for policy 0, policy_version 7804 (0.0011) +[2024-07-05 16:15:53,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11673.6, 300 sec: 11649.3). Total num frames: 31973376. Throughput: 0: 2925.1. Samples: 2990518. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:53,942][04005] Avg episode reward: [(0, '45.508')] +[2024-07-05 16:15:56,402][04594] Updated weights for policy 0, policy_version 7814 (0.0011) +[2024-07-05 16:15:58,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 32034816. Throughput: 0: 2929.6. Samples: 3008194. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:15:58,942][04005] Avg episode reward: [(0, '46.395')] +[2024-07-05 16:15:59,878][04594] Updated weights for policy 0, policy_version 7824 (0.0011) +[2024-07-05 16:16:03,349][04594] Updated weights for policy 0, policy_version 7834 (0.0012) +[2024-07-05 16:16:03,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11673.6, 300 sec: 11663.2). Total num frames: 32092160. Throughput: 0: 2927.2. Samples: 3017012. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:16:03,942][04005] Avg episode reward: [(0, '45.910')] +[2024-07-05 16:16:06,821][04594] Updated weights for policy 0, policy_version 7844 (0.0012) +[2024-07-05 16:16:08,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11677.1). Total num frames: 32153600. Throughput: 0: 2933.6. Samples: 3034786. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:16:08,943][04005] Avg episode reward: [(0, '47.997')] +[2024-07-05 16:16:10,299][04594] Updated weights for policy 0, policy_version 7854 (0.0011) +[2024-07-05 16:16:13,779][04594] Updated weights for policy 0, policy_version 7864 (0.0011) +[2024-07-05 16:16:13,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11663.2). Total num frames: 32210944. Throughput: 0: 2936.6. Samples: 3052362. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:16:13,943][04005] Avg episode reward: [(0, '47.741')] +[2024-07-05 16:16:17,264][04594] Updated weights for policy 0, policy_version 7874 (0.0011) +[2024-07-05 16:16:18,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11663.2). Total num frames: 32268288. Throughput: 0: 2937.9. Samples: 3061288. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:16:18,942][04005] Avg episode reward: [(0, '49.097')] +[2024-07-05 16:16:18,994][04581] Saving new best policy, reward=49.097! +[2024-07-05 16:16:20,741][04594] Updated weights for policy 0, policy_version 7884 (0.0011) +[2024-07-05 16:16:23,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11677.1). Total num frames: 32329728. Throughput: 0: 2937.8. Samples: 3078822. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:16:23,942][04005] Avg episode reward: [(0, '49.886')] +[2024-07-05 16:16:23,945][04581] Saving new best policy, reward=49.886! +[2024-07-05 16:16:24,293][04594] Updated weights for policy 0, policy_version 7894 (0.0011) +[2024-07-05 16:16:27,714][04594] Updated weights for policy 0, policy_version 7904 (0.0011) +[2024-07-05 16:16:28,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11677.1). Total num frames: 32387072. Throughput: 0: 2940.7. Samples: 3096398. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:16:28,943][04005] Avg episode reward: [(0, '48.980')] +[2024-07-05 16:16:31,184][04594] Updated weights for policy 0, policy_version 7914 (0.0011) +[2024-07-05 16:16:33,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11741.8, 300 sec: 11663.2). Total num frames: 32444416. Throughput: 0: 2950.0. Samples: 3105372. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:16:33,943][04005] Avg episode reward: [(0, '48.051')] +[2024-07-05 16:16:33,969][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000007922_32448512.pth... +[2024-07-05 16:16:34,040][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000007238_29646848.pth +[2024-07-05 16:16:34,670][04594] Updated weights for policy 0, policy_version 7924 (0.0011) +[2024-07-05 16:16:38,144][04594] Updated weights for policy 0, policy_version 7934 (0.0011) +[2024-07-05 16:16:38,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11677.1). Total num frames: 32505856. Throughput: 0: 2943.1. Samples: 3122960. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:16:38,943][04005] Avg episode reward: [(0, '47.458')] +[2024-07-05 16:16:41,628][04594] Updated weights for policy 0, policy_version 7944 (0.0011) +[2024-07-05 16:16:43,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11677.1). Total num frames: 32563200. Throughput: 0: 2939.6. Samples: 3140474. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:16:43,942][04005] Avg episode reward: [(0, '46.945')] +[2024-07-05 16:16:45,105][04594] Updated weights for policy 0, policy_version 7954 (0.0011) +[2024-07-05 16:16:48,588][04594] Updated weights for policy 0, policy_version 7964 (0.0013) +[2024-07-05 16:16:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11691.0). Total num frames: 32624640. Throughput: 0: 2942.9. Samples: 3149442. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:16:48,942][04005] Avg episode reward: [(0, '46.584')] +[2024-07-05 16:16:52,062][04594] Updated weights for policy 0, policy_version 7974 (0.0011) +[2024-07-05 16:16:53,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11677.1). Total num frames: 32681984. Throughput: 0: 2938.8. Samples: 3167030. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:16:53,942][04005] Avg episode reward: [(0, '47.806')] +[2024-07-05 16:16:55,543][04594] Updated weights for policy 0, policy_version 7984 (0.0011) +[2024-07-05 16:16:58,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11677.1). Total num frames: 32739328. Throughput: 0: 2938.3. Samples: 3184586. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:16:58,942][04005] Avg episode reward: [(0, '47.462')] +[2024-07-05 16:16:59,016][04594] Updated weights for policy 0, policy_version 7994 (0.0011) +[2024-07-05 16:17:02,487][04594] Updated weights for policy 0, policy_version 8004 (0.0011) +[2024-07-05 16:17:03,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11691.0). Total num frames: 32800768. Throughput: 0: 2940.5. Samples: 3193612. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:17:03,942][04005] Avg episode reward: [(0, '47.697')] +[2024-07-05 16:17:05,959][04594] Updated weights for policy 0, policy_version 8014 (0.0011) +[2024-07-05 16:17:08,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11691.0). Total num frames: 32858112. Throughput: 0: 2941.2. Samples: 3211174. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:17:08,942][04005] Avg episode reward: [(0, '45.504')] +[2024-07-05 16:17:09,462][04594] Updated weights for policy 0, policy_version 8024 (0.0011) +[2024-07-05 16:17:12,929][04594] Updated weights for policy 0, policy_version 8034 (0.0011) +[2024-07-05 16:17:13,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11677.1). Total num frames: 32915456. Throughput: 0: 2943.7. Samples: 3228864. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:17:13,942][04005] Avg episode reward: [(0, '46.172')] +[2024-07-05 16:17:16,407][04594] Updated weights for policy 0, policy_version 8044 (0.0011) +[2024-07-05 16:17:18,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11691.0). Total num frames: 32976896. Throughput: 0: 2940.4. Samples: 3237688. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:17:18,942][04005] Avg episode reward: [(0, '46.346')] +[2024-07-05 16:17:19,900][04594] Updated weights for policy 0, policy_version 8054 (0.0011) +[2024-07-05 16:17:23,366][04594] Updated weights for policy 0, policy_version 8064 (0.0011) +[2024-07-05 16:17:23,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11691.0). Total num frames: 33034240. Throughput: 0: 2938.7. Samples: 3255200. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:17:23,943][04005] Avg episode reward: [(0, '46.003')] +[2024-07-05 16:17:26,845][04594] Updated weights for policy 0, policy_version 8074 (0.0011) +[2024-07-05 16:17:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11704.8). Total num frames: 33095680. Throughput: 0: 2948.3. Samples: 3273148. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:17:28,942][04005] Avg episode reward: [(0, '45.687')] +[2024-07-05 16:17:30,320][04594] Updated weights for policy 0, policy_version 8084 (0.0011) +[2024-07-05 16:17:33,793][04594] Updated weights for policy 0, policy_version 8094 (0.0011) +[2024-07-05 16:17:33,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11704.8). Total num frames: 33153024. Throughput: 0: 2940.6. Samples: 3281768. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:17:33,942][04005] Avg episode reward: [(0, '44.812')] +[2024-07-05 16:17:37,276][04594] Updated weights for policy 0, policy_version 8104 (0.0011) +[2024-07-05 16:17:38,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11691.0). Total num frames: 33210368. Throughput: 0: 2943.9. Samples: 3299504. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:17:38,942][04005] Avg episode reward: [(0, '44.012')] +[2024-07-05 16:17:40,757][04594] Updated weights for policy 0, policy_version 8114 (0.0011) +[2024-07-05 16:17:43,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11810.1, 300 sec: 11704.8). Total num frames: 33271808. Throughput: 0: 2949.3. Samples: 3317306. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:17:43,942][04005] Avg episode reward: [(0, '43.446')] +[2024-07-05 16:17:44,250][04594] Updated weights for policy 0, policy_version 8124 (0.0011) +[2024-07-05 16:17:47,729][04594] Updated weights for policy 0, policy_version 8134 (0.0011) +[2024-07-05 16:17:48,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.9, 300 sec: 11704.8). Total num frames: 33329152. Throughput: 0: 2939.0. Samples: 3325866. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:17:48,943][04005] Avg episode reward: [(0, '45.237')] +[2024-07-05 16:17:51,206][04594] Updated weights for policy 0, policy_version 8144 (0.0011) +[2024-07-05 16:17:53,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11704.8). Total num frames: 33386496. Throughput: 0: 2945.4. Samples: 3343718. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:17:53,942][04005] Avg episode reward: [(0, '48.305')] +[2024-07-05 16:17:54,682][04594] Updated weights for policy 0, policy_version 8154 (0.0012) +[2024-07-05 16:17:58,156][04594] Updated weights for policy 0, policy_version 8164 (0.0012) +[2024-07-05 16:17:58,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11704.8). Total num frames: 33447936. Throughput: 0: 2945.1. Samples: 3361396. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:17:58,943][04005] Avg episode reward: [(0, '49.196')] +[2024-07-05 16:18:01,624][04594] Updated weights for policy 0, policy_version 8174 (0.0011) +[2024-07-05 16:18:03,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11704.8). Total num frames: 33505280. Throughput: 0: 2942.8. Samples: 3370114. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:03,944][04005] Avg episode reward: [(0, '46.616')] +[2024-07-05 16:18:05,101][04594] Updated weights for policy 0, policy_version 8184 (0.0012) +[2024-07-05 16:18:08,589][04594] Updated weights for policy 0, policy_version 8194 (0.0011) +[2024-07-05 16:18:08,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11718.7). Total num frames: 33566720. Throughput: 0: 2949.7. Samples: 3387938. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:08,942][04005] Avg episode reward: [(0, '45.445')] +[2024-07-05 16:18:12,072][04594] Updated weights for policy 0, policy_version 8204 (0.0011) +[2024-07-05 16:18:13,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11718.7). Total num frames: 33624064. Throughput: 0: 2940.7. Samples: 3405480. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:13,943][04005] Avg episode reward: [(0, '45.422')] +[2024-07-05 16:18:15,550][04594] Updated weights for policy 0, policy_version 8214 (0.0011) +[2024-07-05 16:18:18,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 33681408. Throughput: 0: 2945.5. Samples: 3414316. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:18,942][04005] Avg episode reward: [(0, '45.350')] +[2024-07-05 16:18:19,027][04594] Updated weights for policy 0, policy_version 8224 (0.0011) +[2024-07-05 16:18:22,492][04594] Updated weights for policy 0, policy_version 8234 (0.0011) +[2024-07-05 16:18:23,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11718.7). Total num frames: 33742848. Throughput: 0: 2945.1. Samples: 3432032. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:23,942][04005] Avg episode reward: [(0, '46.699')] +[2024-07-05 16:18:25,959][04594] Updated weights for policy 0, policy_version 8244 (0.0011) +[2024-07-05 16:18:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 33800192. Throughput: 0: 2940.4. Samples: 3449624. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:28,942][04005] Avg episode reward: [(0, '45.970')] +[2024-07-05 16:18:29,437][04594] Updated weights for policy 0, policy_version 8254 (0.0011) +[2024-07-05 16:18:32,915][04594] Updated weights for policy 0, policy_version 8264 (0.0011) +[2024-07-05 16:18:33,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 33857536. Throughput: 0: 2949.9. Samples: 3458612. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:33,944][04005] Avg episode reward: [(0, '47.758')] +[2024-07-05 16:18:33,956][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000008267_33861632.pth... +[2024-07-05 16:18:34,028][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000007579_31043584.pth +[2024-07-05 16:18:36,400][04594] Updated weights for policy 0, policy_version 8274 (0.0011) +[2024-07-05 16:18:38,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11732.6). Total num frames: 33918976. Throughput: 0: 2943.1. Samples: 3476158. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:38,942][04005] Avg episode reward: [(0, '46.848')] +[2024-07-05 16:18:39,901][04594] Updated weights for policy 0, policy_version 8284 (0.0011) +[2024-07-05 16:18:43,374][04594] Updated weights for policy 0, policy_version 8294 (0.0011) +[2024-07-05 16:18:43,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 33976320. Throughput: 0: 2939.1. Samples: 3493656. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:43,942][04005] Avg episode reward: [(0, '47.524')] +[2024-07-05 16:18:46,852][04594] Updated weights for policy 0, policy_version 8304 (0.0012) +[2024-07-05 16:18:48,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11718.7). Total num frames: 34033664. Throughput: 0: 2945.5. Samples: 3502662. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:48,942][04005] Avg episode reward: [(0, '46.822')] +[2024-07-05 16:18:50,348][04594] Updated weights for policy 0, policy_version 8314 (0.0011) +[2024-07-05 16:18:53,822][04594] Updated weights for policy 0, policy_version 8324 (0.0011) +[2024-07-05 16:18:53,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11732.6). Total num frames: 34095104. Throughput: 0: 2939.4. Samples: 3520210. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:53,942][04005] Avg episode reward: [(0, '44.826')] +[2024-07-05 16:18:57,296][04594] Updated weights for policy 0, policy_version 8334 (0.0011) +[2024-07-05 16:18:58,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 34152448. Throughput: 0: 2939.1. Samples: 3537738. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:18:58,942][04005] Avg episode reward: [(0, '44.142')] +[2024-07-05 16:19:00,773][04594] Updated weights for policy 0, policy_version 8344 (0.0011) +[2024-07-05 16:19:03,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11746.5). Total num frames: 34213888. Throughput: 0: 2943.2. Samples: 3546760. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:19:03,942][04005] Avg episode reward: [(0, '44.466')] +[2024-07-05 16:19:04,259][04594] Updated weights for policy 0, policy_version 8354 (0.0011) +[2024-07-05 16:19:07,732][04594] Updated weights for policy 0, policy_version 8364 (0.0012) +[2024-07-05 16:19:08,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 34271232. Throughput: 0: 2939.0. Samples: 3564286. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:19:08,942][04005] Avg episode reward: [(0, '47.074')] +[2024-07-05 16:19:11,230][04594] Updated weights for policy 0, policy_version 8374 (0.0011) +[2024-07-05 16:19:13,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 34328576. Throughput: 0: 2937.6. Samples: 3581818. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:19:13,942][04005] Avg episode reward: [(0, '46.700')] +[2024-07-05 16:19:14,698][04594] Updated weights for policy 0, policy_version 8384 (0.0011) +[2024-07-05 16:19:18,169][04594] Updated weights for policy 0, policy_version 8394 (0.0011) +[2024-07-05 16:19:18,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11746.5). Total num frames: 34390016. Throughput: 0: 2937.6. Samples: 3590804. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:19:18,944][04005] Avg episode reward: [(0, '46.757')] +[2024-07-05 16:19:21,644][04594] Updated weights for policy 0, policy_version 8404 (0.0012) +[2024-07-05 16:19:23,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 34447360. Throughput: 0: 2937.6. Samples: 3608350. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:19:23,942][04005] Avg episode reward: [(0, '46.262')] +[2024-07-05 16:19:25,123][04594] Updated weights for policy 0, policy_version 8414 (0.0011) +[2024-07-05 16:19:28,625][04594] Updated weights for policy 0, policy_version 8424 (0.0011) +[2024-07-05 16:19:28,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11732.6). Total num frames: 34504704. Throughput: 0: 2941.9. Samples: 3626042. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:19:28,942][04005] Avg episode reward: [(0, '46.832')] +[2024-07-05 16:19:32,102][04594] Updated weights for policy 0, policy_version 8434 (0.0011) +[2024-07-05 16:19:33,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11746.5). Total num frames: 34566144. Throughput: 0: 2938.7. Samples: 3634904. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:19:33,943][04005] Avg episode reward: [(0, '45.388')] +[2024-07-05 16:19:35,603][04594] Updated weights for policy 0, policy_version 8444 (0.0011) +[2024-07-05 16:19:38,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 34623488. Throughput: 0: 2938.4. Samples: 3652436. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:19:38,942][04005] Avg episode reward: [(0, '47.085')] +[2024-07-05 16:19:39,072][04594] Updated weights for policy 0, policy_version 8454 (0.0011) +[2024-07-05 16:19:42,578][04594] Updated weights for policy 0, policy_version 8464 (0.0012) +[2024-07-05 16:19:43,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 34680832. Throughput: 0: 2941.9. Samples: 3670122. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:19:43,942][04005] Avg episode reward: [(0, '47.124')] +[2024-07-05 16:19:46,055][04594] Updated weights for policy 0, policy_version 8474 (0.0011) +[2024-07-05 16:19:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 34742272. Throughput: 0: 2937.8. Samples: 3678962. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:19:48,943][04005] Avg episode reward: [(0, '49.370')] +[2024-07-05 16:19:49,542][04594] Updated weights for policy 0, policy_version 8484 (0.0011) +[2024-07-05 16:19:53,026][04594] Updated weights for policy 0, policy_version 8494 (0.0011) +[2024-07-05 16:19:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 34799616. Throughput: 0: 2938.3. Samples: 3696508. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:19:53,942][04005] Avg episode reward: [(0, '48.300')] +[2024-07-05 16:19:56,517][04594] Updated weights for policy 0, policy_version 8504 (0.0012) +[2024-07-05 16:19:58,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 34856960. Throughput: 0: 2944.4. Samples: 3714318. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:19:58,942][04005] Avg episode reward: [(0, '48.422')] +[2024-07-05 16:20:00,007][04594] Updated weights for policy 0, policy_version 8514 (0.0011) +[2024-07-05 16:20:03,479][04594] Updated weights for policy 0, policy_version 8524 (0.0011) +[2024-07-05 16:20:03,942][04005] Fps is (10 sec: 11877.8, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 34918400. Throughput: 0: 2938.0. Samples: 3723016. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:03,943][04005] Avg episode reward: [(0, '46.459')] +[2024-07-05 16:20:06,960][04594] Updated weights for policy 0, policy_version 8534 (0.0011) +[2024-07-05 16:20:08,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 34975744. Throughput: 0: 2938.4. Samples: 3740576. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:08,942][04005] Avg episode reward: [(0, '46.055')] +[2024-07-05 16:20:10,450][04594] Updated weights for policy 0, policy_version 8544 (0.0011) +[2024-07-05 16:20:13,942][04005] Fps is (10 sec: 11469.4, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 35033088. Throughput: 0: 2940.3. Samples: 3758356. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:13,943][04005] Avg episode reward: [(0, '45.833')] +[2024-07-05 16:20:13,947][04594] Updated weights for policy 0, policy_version 8554 (0.0012) +[2024-07-05 16:20:17,426][04594] Updated weights for policy 0, policy_version 8564 (0.0011) +[2024-07-05 16:20:18,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 35094528. Throughput: 0: 2937.0. Samples: 3767070. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:18,942][04005] Avg episode reward: [(0, '46.972')] +[2024-07-05 16:20:20,893][04594] Updated weights for policy 0, policy_version 8574 (0.0011) +[2024-07-05 16:20:23,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 35151872. Throughput: 0: 2940.5. Samples: 3784758. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:23,942][04005] Avg episode reward: [(0, '46.905')] +[2024-07-05 16:20:24,365][04594] Updated weights for policy 0, policy_version 8584 (0.0011) +[2024-07-05 16:20:27,859][04594] Updated weights for policy 0, policy_version 8594 (0.0011) +[2024-07-05 16:20:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11774.3). Total num frames: 35213312. Throughput: 0: 2944.1. Samples: 3802606. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:28,942][04005] Avg episode reward: [(0, '47.395')] +[2024-07-05 16:20:31,347][04594] Updated weights for policy 0, policy_version 8604 (0.0015) +[2024-07-05 16:20:33,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11774.3). Total num frames: 35270656. Throughput: 0: 2937.6. Samples: 3811156. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:33,942][04005] Avg episode reward: [(0, '47.865')] +[2024-07-05 16:20:34,131][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000008612_35274752.pth... +[2024-07-05 16:20:34,203][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000007922_32448512.pth +[2024-07-05 16:20:34,830][04594] Updated weights for policy 0, policy_version 8614 (0.0012) +[2024-07-05 16:20:38,307][04594] Updated weights for policy 0, policy_version 8624 (0.0011) +[2024-07-05 16:20:38,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 35328000. Throughput: 0: 2942.9. Samples: 3828940. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:38,943][04005] Avg episode reward: [(0, '47.140')] +[2024-07-05 16:20:41,796][04594] Updated weights for policy 0, policy_version 8634 (0.0011) +[2024-07-05 16:20:43,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11774.3). Total num frames: 35389440. Throughput: 0: 2940.8. Samples: 3846654. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:43,942][04005] Avg episode reward: [(0, '45.214')] +[2024-07-05 16:20:45,297][04594] Updated weights for policy 0, policy_version 8644 (0.0012) +[2024-07-05 16:20:48,787][04594] Updated weights for policy 0, policy_version 8654 (0.0011) +[2024-07-05 16:20:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11774.3). Total num frames: 35446784. Throughput: 0: 2937.1. Samples: 3855184. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:48,942][04005] Avg episode reward: [(0, '45.763')] +[2024-07-05 16:20:52,277][04594] Updated weights for policy 0, policy_version 8664 (0.0011) +[2024-07-05 16:20:53,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 35504128. Throughput: 0: 2939.7. Samples: 3872862. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:53,942][04005] Avg episode reward: [(0, '47.209')] +[2024-07-05 16:20:55,752][04594] Updated weights for policy 0, policy_version 8674 (0.0012) +[2024-07-05 16:20:58,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11774.3). Total num frames: 35565568. Throughput: 0: 2940.1. Samples: 3890660. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:20:58,942][04005] Avg episode reward: [(0, '49.696')] +[2024-07-05 16:20:59,247][04594] Updated weights for policy 0, policy_version 8684 (0.0011) +[2024-07-05 16:21:02,733][04594] Updated weights for policy 0, policy_version 8694 (0.0011) +[2024-07-05 16:21:03,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11742.0, 300 sec: 11760.4). Total num frames: 35622912. Throughput: 0: 2936.5. Samples: 3899212. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:21:03,942][04005] Avg episode reward: [(0, '49.397')] +[2024-07-05 16:21:06,226][04594] Updated weights for policy 0, policy_version 8704 (0.0011) +[2024-07-05 16:21:08,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 35680256. Throughput: 0: 2937.1. Samples: 3916926. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:21:08,943][04005] Avg episode reward: [(0, '49.549')] +[2024-07-05 16:21:09,712][04594] Updated weights for policy 0, policy_version 8714 (0.0011) +[2024-07-05 16:21:13,197][04594] Updated weights for policy 0, policy_version 8724 (0.0011) +[2024-07-05 16:21:13,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11810.2, 300 sec: 11774.3). Total num frames: 35741696. Throughput: 0: 2934.8. Samples: 3934672. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:21:13,942][04005] Avg episode reward: [(0, '50.149')] +[2024-07-05 16:21:13,945][04581] Saving new best policy, reward=50.149! +[2024-07-05 16:21:16,687][04594] Updated weights for policy 0, policy_version 8734 (0.0011) +[2024-07-05 16:21:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 35799040. Throughput: 0: 2934.0. Samples: 3943188. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:21:18,942][04005] Avg episode reward: [(0, '50.233')] +[2024-07-05 16:21:19,118][04581] Saving new best policy, reward=50.233! +[2024-07-05 16:21:20,174][04594] Updated weights for policy 0, policy_version 8744 (0.0011) +[2024-07-05 16:21:23,661][04594] Updated weights for policy 0, policy_version 8754 (0.0013) +[2024-07-05 16:21:23,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 35856384. Throughput: 0: 2933.3. Samples: 3960938. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:21:23,942][04005] Avg episode reward: [(0, '51.516')] +[2024-07-05 16:21:24,004][04581] Saving new best policy, reward=51.516! +[2024-07-05 16:21:27,152][04594] Updated weights for policy 0, policy_version 8764 (0.0011) +[2024-07-05 16:21:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11774.3). Total num frames: 35917824. Throughput: 0: 2934.4. Samples: 3978702. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:21:28,942][04005] Avg episode reward: [(0, '50.440')] +[2024-07-05 16:21:30,637][04594] Updated weights for policy 0, policy_version 8774 (0.0012) +[2024-07-05 16:21:33,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 35975168. Throughput: 0: 2934.6. Samples: 3987242. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:21:33,942][04005] Avg episode reward: [(0, '52.027')] +[2024-07-05 16:21:34,125][04581] Saving new best policy, reward=52.027! +[2024-07-05 16:21:34,128][04594] Updated weights for policy 0, policy_version 8784 (0.0011) +[2024-07-05 16:21:37,619][04594] Updated weights for policy 0, policy_version 8794 (0.0012) +[2024-07-05 16:21:38,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36032512. Throughput: 0: 2934.6. Samples: 4004920. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:21:38,942][04005] Avg episode reward: [(0, '49.444')] +[2024-07-05 16:21:41,099][04594] Updated weights for policy 0, policy_version 8804 (0.0011) +[2024-07-05 16:21:43,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36093952. Throughput: 0: 2934.0. Samples: 4022690. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:21:43,942][04005] Avg episode reward: [(0, '48.956')] +[2024-07-05 16:21:44,585][04594] Updated weights for policy 0, policy_version 8814 (0.0011) +[2024-07-05 16:21:48,060][04594] Updated weights for policy 0, policy_version 8824 (0.0011) +[2024-07-05 16:21:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36151296. Throughput: 0: 2934.7. Samples: 4031272. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:21:48,942][04005] Avg episode reward: [(0, '46.199')] +[2024-07-05 16:21:51,558][04594] Updated weights for policy 0, policy_version 8834 (0.0011) +[2024-07-05 16:21:53,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36208640. Throughput: 0: 2936.7. Samples: 4049076. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:21:53,942][04005] Avg episode reward: [(0, '47.576')] +[2024-07-05 16:21:55,037][04594] Updated weights for policy 0, policy_version 8844 (0.0011) +[2024-07-05 16:21:58,504][04594] Updated weights for policy 0, policy_version 8854 (0.0011) +[2024-07-05 16:21:58,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36270080. Throughput: 0: 2935.5. Samples: 4066770. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:21:58,942][04005] Avg episode reward: [(0, '47.814')] +[2024-07-05 16:22:01,983][04594] Updated weights for policy 0, policy_version 8864 (0.0011) +[2024-07-05 16:22:03,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36327424. Throughput: 0: 2939.5. Samples: 4075468. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:22:03,943][04005] Avg episode reward: [(0, '48.085')] +[2024-07-05 16:22:05,455][04594] Updated weights for policy 0, policy_version 8874 (0.0011) +[2024-07-05 16:22:08,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36384768. Throughput: 0: 2940.9. Samples: 4093280. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:22:08,942][04005] Avg episode reward: [(0, '48.632')] +[2024-07-05 16:22:08,947][04594] Updated weights for policy 0, policy_version 8884 (0.0012) +[2024-07-05 16:22:12,435][04594] Updated weights for policy 0, policy_version 8894 (0.0011) +[2024-07-05 16:22:13,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36446208. Throughput: 0: 2935.5. Samples: 4110798. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:22:13,942][04005] Avg episode reward: [(0, '47.280')] +[2024-07-05 16:22:15,926][04594] Updated weights for policy 0, policy_version 8904 (0.0011) +[2024-07-05 16:22:18,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 36503552. Throughput: 0: 2940.0. Samples: 4119542. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:22:18,943][04005] Avg episode reward: [(0, '48.050')] +[2024-07-05 16:22:19,409][04594] Updated weights for policy 0, policy_version 8914 (0.0011) +[2024-07-05 16:22:22,899][04594] Updated weights for policy 0, policy_version 8924 (0.0011) +[2024-07-05 16:22:23,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 36564992. Throughput: 0: 2942.3. Samples: 4137322. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:22:23,942][04005] Avg episode reward: [(0, '48.158')] +[2024-07-05 16:22:26,379][04594] Updated weights for policy 0, policy_version 8934 (0.0011) +[2024-07-05 16:22:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36622336. Throughput: 0: 2937.5. Samples: 4154878. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:22:28,942][04005] Avg episode reward: [(0, '49.922')] +[2024-07-05 16:22:29,856][04594] Updated weights for policy 0, policy_version 8944 (0.0011) +[2024-07-05 16:22:33,340][04594] Updated weights for policy 0, policy_version 8954 (0.0011) +[2024-07-05 16:22:33,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36679680. Throughput: 0: 2942.6. Samples: 4163688. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:22:33,942][04005] Avg episode reward: [(0, '51.462')] +[2024-07-05 16:22:34,037][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000008956_36683776.pth... +[2024-07-05 16:22:34,107][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000008267_33861632.pth +[2024-07-05 16:22:36,824][04594] Updated weights for policy 0, policy_version 8964 (0.0011) +[2024-07-05 16:22:38,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 36741120. Throughput: 0: 2940.0. Samples: 4181376. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:22:38,942][04005] Avg episode reward: [(0, '49.611')] +[2024-07-05 16:22:40,326][04594] Updated weights for policy 0, policy_version 8974 (0.0012) +[2024-07-05 16:22:43,810][04594] Updated weights for policy 0, policy_version 8984 (0.0011) +[2024-07-05 16:22:43,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36798464. Throughput: 0: 2936.8. Samples: 4198928. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:22:43,942][04005] Avg episode reward: [(0, '48.276')] +[2024-07-05 16:22:47,285][04594] Updated weights for policy 0, policy_version 8994 (0.0011) +[2024-07-05 16:22:48,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 36855808. Throughput: 0: 2940.3. Samples: 4207780. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:22:48,943][04005] Avg episode reward: [(0, '47.943')] +[2024-07-05 16:22:50,761][04594] Updated weights for policy 0, policy_version 9004 (0.0011) +[2024-07-05 16:22:53,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 36917248. Throughput: 0: 2936.0. Samples: 4225402. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:22:53,942][04005] Avg episode reward: [(0, '49.359')] +[2024-07-05 16:22:54,264][04594] Updated weights for policy 0, policy_version 9014 (0.0011) +[2024-07-05 16:22:57,734][04594] Updated weights for policy 0, policy_version 9024 (0.0011) +[2024-07-05 16:22:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 36974592. Throughput: 0: 2937.2. Samples: 4242972. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:22:58,942][04005] Avg episode reward: [(0, '50.127')] +[2024-07-05 16:23:01,220][04594] Updated weights for policy 0, policy_version 9034 (0.0011) +[2024-07-05 16:23:03,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 37031936. Throughput: 0: 2942.0. Samples: 4251930. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:03,943][04005] Avg episode reward: [(0, '49.385')] +[2024-07-05 16:23:04,696][04594] Updated weights for policy 0, policy_version 9044 (0.0011) +[2024-07-05 16:23:08,174][04594] Updated weights for policy 0, policy_version 9054 (0.0011) +[2024-07-05 16:23:08,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 37093376. Throughput: 0: 2936.8. Samples: 4269476. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:08,942][04005] Avg episode reward: [(0, '48.462')] +[2024-07-05 16:23:11,674][04594] Updated weights for policy 0, policy_version 9064 (0.0011) +[2024-07-05 16:23:13,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 37150720. Throughput: 0: 2935.9. Samples: 4286994. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:13,942][04005] Avg episode reward: [(0, '48.973')] +[2024-07-05 16:23:15,143][04594] Updated weights for policy 0, policy_version 9074 (0.0011) +[2024-07-05 16:23:18,621][04594] Updated weights for policy 0, policy_version 9084 (0.0011) +[2024-07-05 16:23:18,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 37208064. Throughput: 0: 2940.6. Samples: 4296014. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:18,942][04005] Avg episode reward: [(0, '49.416')] +[2024-07-05 16:23:22,098][04594] Updated weights for policy 0, policy_version 9094 (0.0011) +[2024-07-05 16:23:23,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 37269504. Throughput: 0: 2937.1. Samples: 4313546. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:23,943][04005] Avg episode reward: [(0, '52.101')] +[2024-07-05 16:23:24,195][04581] Saving new best policy, reward=52.101! +[2024-07-05 16:23:25,601][04594] Updated weights for policy 0, policy_version 9104 (0.0012) +[2024-07-05 16:23:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 37326848. Throughput: 0: 2936.8. Samples: 4331084. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:28,942][04005] Avg episode reward: [(0, '51.577')] +[2024-07-05 16:23:29,080][04594] Updated weights for policy 0, policy_version 9114 (0.0013) +[2024-07-05 16:23:32,563][04594] Updated weights for policy 0, policy_version 9124 (0.0011) +[2024-07-05 16:23:33,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 37384192. Throughput: 0: 2939.8. Samples: 4340072. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:33,942][04005] Avg episode reward: [(0, '49.821')] +[2024-07-05 16:23:36,054][04594] Updated weights for policy 0, policy_version 9134 (0.0011) +[2024-07-05 16:23:38,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 37445632. Throughput: 0: 2938.4. Samples: 4357630. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:38,942][04005] Avg episode reward: [(0, '49.233')] +[2024-07-05 16:23:39,537][04594] Updated weights for policy 0, policy_version 9144 (0.0011) +[2024-07-05 16:23:43,012][04594] Updated weights for policy 0, policy_version 9154 (0.0011) +[2024-07-05 16:23:43,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 37502976. Throughput: 0: 2937.9. Samples: 4375176. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:43,942][04005] Avg episode reward: [(0, '49.435')] +[2024-07-05 16:23:46,498][04594] Updated weights for policy 0, policy_version 9164 (0.0011) +[2024-07-05 16:23:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 37564416. Throughput: 0: 2938.1. Samples: 4384146. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:48,942][04005] Avg episode reward: [(0, '50.496')] +[2024-07-05 16:23:49,982][04594] Updated weights for policy 0, policy_version 9174 (0.0011) +[2024-07-05 16:23:53,461][04594] Updated weights for policy 0, policy_version 9184 (0.0012) +[2024-07-05 16:23:53,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 37621760. Throughput: 0: 2939.1. Samples: 4401736. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:53,944][04005] Avg episode reward: [(0, '50.089')] +[2024-07-05 16:23:56,950][04594] Updated weights for policy 0, policy_version 9194 (0.0013) +[2024-07-05 16:23:58,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 37679104. Throughput: 0: 2938.4. Samples: 4419224. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:23:58,943][04005] Avg episode reward: [(0, '49.250')] +[2024-07-05 16:24:00,429][04594] Updated weights for policy 0, policy_version 9204 (0.0011) +[2024-07-05 16:24:03,923][04594] Updated weights for policy 0, policy_version 9214 (0.0011) +[2024-07-05 16:24:03,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 37740544. Throughput: 0: 2938.3. Samples: 4428236. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:24:03,942][04005] Avg episode reward: [(0, '48.943')] +[2024-07-05 16:24:07,402][04594] Updated weights for policy 0, policy_version 9224 (0.0013) +[2024-07-05 16:24:08,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 37797888. Throughput: 0: 2937.7. Samples: 4445744. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:24:08,942][04005] Avg episode reward: [(0, '49.042')] +[2024-07-05 16:24:10,901][04594] Updated weights for policy 0, policy_version 9234 (0.0012) +[2024-07-05 16:24:13,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 37855232. Throughput: 0: 2937.6. Samples: 4463278. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:24:13,942][04005] Avg episode reward: [(0, '48.452')] +[2024-07-05 16:24:14,390][04594] Updated weights for policy 0, policy_version 9244 (0.0014) +[2024-07-05 16:24:17,883][04594] Updated weights for policy 0, policy_version 9254 (0.0011) +[2024-07-05 16:24:18,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 37916672. Throughput: 0: 2936.9. Samples: 4472232. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:24:18,942][04005] Avg episode reward: [(0, '48.075')] +[2024-07-05 16:24:21,364][04594] Updated weights for policy 0, policy_version 9264 (0.0012) +[2024-07-05 16:24:23,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 37974016. Throughput: 0: 2936.8. Samples: 4489788. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:24:23,942][04005] Avg episode reward: [(0, '49.301')] +[2024-07-05 16:24:24,872][04594] Updated weights for policy 0, policy_version 9274 (0.0011) +[2024-07-05 16:24:28,347][04594] Updated weights for policy 0, policy_version 9284 (0.0011) +[2024-07-05 16:24:28,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 38031360. Throughput: 0: 2937.2. Samples: 4507348. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:24:28,942][04005] Avg episode reward: [(0, '49.878')] +[2024-07-05 16:24:31,821][04594] Updated weights for policy 0, policy_version 9294 (0.0011) +[2024-07-05 16:24:33,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 38092800. Throughput: 0: 2938.1. Samples: 4516360. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:24:33,942][04005] Avg episode reward: [(0, '49.788')] +[2024-07-05 16:24:33,945][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000009300_38092800.pth... +[2024-07-05 16:24:34,018][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000008612_35274752.pth +[2024-07-05 16:24:35,310][04594] Updated weights for policy 0, policy_version 9304 (0.0012) +[2024-07-05 16:24:38,797][04594] Updated weights for policy 0, policy_version 9314 (0.0011) +[2024-07-05 16:24:38,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 38150144. Throughput: 0: 2936.0. Samples: 4533858. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:24:38,942][04005] Avg episode reward: [(0, '50.099')] +[2024-07-05 16:24:42,292][04594] Updated weights for policy 0, policy_version 9324 (0.0011) +[2024-07-05 16:24:43,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 38207488. Throughput: 0: 2937.1. Samples: 4551392. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:24:43,942][04005] Avg episode reward: [(0, '51.074')] +[2024-07-05 16:24:45,768][04594] Updated weights for policy 0, policy_version 9334 (0.0011) +[2024-07-05 16:24:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 38268928. Throughput: 0: 2937.2. Samples: 4560408. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:24:48,942][04005] Avg episode reward: [(0, '51.541')] +[2024-07-05 16:24:49,266][04594] Updated weights for policy 0, policy_version 9344 (0.0011) +[2024-07-05 16:24:52,739][04594] Updated weights for policy 0, policy_version 9354 (0.0011) +[2024-07-05 16:24:53,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 38326272. Throughput: 0: 2937.7. Samples: 4577940. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:24:53,942][04005] Avg episode reward: [(0, '51.766')] +[2024-07-05 16:24:56,226][04594] Updated weights for policy 0, policy_version 9364 (0.0011) +[2024-07-05 16:24:58,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 38383616. Throughput: 0: 2937.4. Samples: 4595460. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:24:58,942][04005] Avg episode reward: [(0, '49.415')] +[2024-07-05 16:24:59,702][04594] Updated weights for policy 0, policy_version 9374 (0.0011) +[2024-07-05 16:25:03,179][04594] Updated weights for policy 0, policy_version 9384 (0.0011) +[2024-07-05 16:25:03,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 38445056. Throughput: 0: 2938.8. Samples: 4604476. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:03,942][04005] Avg episode reward: [(0, '49.255')] +[2024-07-05 16:25:06,655][04594] Updated weights for policy 0, policy_version 9394 (0.0011) +[2024-07-05 16:25:08,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 38502400. Throughput: 0: 2939.0. Samples: 4622042. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:08,943][04005] Avg episode reward: [(0, '49.161')] +[2024-07-05 16:25:10,147][04594] Updated weights for policy 0, policy_version 9404 (0.0011) +[2024-07-05 16:25:13,636][04594] Updated weights for policy 0, policy_version 9414 (0.0011) +[2024-07-05 16:25:13,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 38559744. Throughput: 0: 2939.7. Samples: 4639636. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:13,942][04005] Avg episode reward: [(0, '50.114')] +[2024-07-05 16:25:17,121][04594] Updated weights for policy 0, policy_version 9424 (0.0011) +[2024-07-05 16:25:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 38621184. Throughput: 0: 2937.6. Samples: 4648554. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:18,942][04005] Avg episode reward: [(0, '50.532')] +[2024-07-05 16:25:20,604][04594] Updated weights for policy 0, policy_version 9434 (0.0013) +[2024-07-05 16:25:23,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 38678528. Throughput: 0: 2938.0. Samples: 4666066. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:23,943][04005] Avg episode reward: [(0, '49.455')] +[2024-07-05 16:25:24,083][04594] Updated weights for policy 0, policy_version 9444 (0.0011) +[2024-07-05 16:25:27,572][04594] Updated weights for policy 0, policy_version 9454 (0.0011) +[2024-07-05 16:25:28,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 38735872. Throughput: 0: 2942.4. Samples: 4683800. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:28,942][04005] Avg episode reward: [(0, '46.931')] +[2024-07-05 16:25:31,053][04594] Updated weights for policy 0, policy_version 9464 (0.0011) +[2024-07-05 16:25:33,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 38797312. Throughput: 0: 2937.2. Samples: 4692584. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:33,943][04005] Avg episode reward: [(0, '45.696')] +[2024-07-05 16:25:34,531][04594] Updated weights for policy 0, policy_version 9474 (0.0011) +[2024-07-05 16:25:38,004][04594] Updated weights for policy 0, policy_version 9484 (0.0011) +[2024-07-05 16:25:38,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 38854656. Throughput: 0: 2937.6. Samples: 4710130. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:38,942][04005] Avg episode reward: [(0, '44.838')] +[2024-07-05 16:25:41,502][04594] Updated weights for policy 0, policy_version 9494 (0.0011) +[2024-07-05 16:25:43,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 38916096. Throughput: 0: 2945.7. Samples: 4728018. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:43,942][04005] Avg episode reward: [(0, '46.614')] +[2024-07-05 16:25:44,982][04594] Updated weights for policy 0, policy_version 9504 (0.0013) +[2024-07-05 16:25:48,454][04594] Updated weights for policy 0, policy_version 9514 (0.0011) +[2024-07-05 16:25:48,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 38973440. Throughput: 0: 2938.4. Samples: 4736706. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:48,943][04005] Avg episode reward: [(0, '48.443')] +[2024-07-05 16:25:51,939][04594] Updated weights for policy 0, policy_version 9524 (0.0012) +[2024-07-05 16:25:53,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 39030784. Throughput: 0: 2940.0. Samples: 4754342. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:53,942][04005] Avg episode reward: [(0, '48.767')] +[2024-07-05 16:25:55,415][04594] Updated weights for policy 0, policy_version 9534 (0.0011) +[2024-07-05 16:25:58,911][04594] Updated weights for policy 0, policy_version 9544 (0.0012) +[2024-07-05 16:25:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 39092224. Throughput: 0: 2945.9. Samples: 4772202. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:25:58,942][04005] Avg episode reward: [(0, '50.872')] +[2024-07-05 16:26:02,386][04594] Updated weights for policy 0, policy_version 9554 (0.0011) +[2024-07-05 16:26:03,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 39149568. Throughput: 0: 2938.9. Samples: 4780806. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:26:03,943][04005] Avg episode reward: [(0, '50.645')] +[2024-07-05 16:26:05,872][04594] Updated weights for policy 0, policy_version 9564 (0.0011) +[2024-07-05 16:26:08,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 39206912. Throughput: 0: 2944.4. Samples: 4798562. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:26:08,942][04005] Avg episode reward: [(0, '51.197')] +[2024-07-05 16:26:09,351][04594] Updated weights for policy 0, policy_version 9574 (0.0011) +[2024-07-05 16:26:12,849][04594] Updated weights for policy 0, policy_version 9584 (0.0011) +[2024-07-05 16:26:13,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 39268352. Throughput: 0: 2943.7. Samples: 4816268. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:26:13,942][04005] Avg episode reward: [(0, '50.613')] +[2024-07-05 16:26:16,323][04594] Updated weights for policy 0, policy_version 9594 (0.0011) +[2024-07-05 16:26:18,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 39325696. Throughput: 0: 2938.7. Samples: 4824826. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:26:18,942][04005] Avg episode reward: [(0, '48.845')] +[2024-07-05 16:26:19,797][04594] Updated weights for policy 0, policy_version 9604 (0.0012) +[2024-07-05 16:26:23,292][04594] Updated weights for policy 0, policy_version 9614 (0.0011) +[2024-07-05 16:26:23,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 39383040. Throughput: 0: 2945.1. Samples: 4842658. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:26:23,943][04005] Avg episode reward: [(0, '45.157')] +[2024-07-05 16:26:26,767][04594] Updated weights for policy 0, policy_version 9624 (0.0013) +[2024-07-05 16:26:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 39444480. Throughput: 0: 2940.2. Samples: 4860326. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:26:28,942][04005] Avg episode reward: [(0, '46.111')] +[2024-07-05 16:26:30,252][04594] Updated weights for policy 0, policy_version 9634 (0.0011) +[2024-07-05 16:26:33,730][04594] Updated weights for policy 0, policy_version 9644 (0.0011) +[2024-07-05 16:26:33,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 39501824. Throughput: 0: 2939.4. Samples: 4868980. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:26:33,943][04005] Avg episode reward: [(0, '46.749')] +[2024-07-05 16:26:34,075][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000009645_39505920.pth... +[2024-07-05 16:26:34,147][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000008956_36683776.pth +[2024-07-05 16:26:37,241][04594] Updated weights for policy 0, policy_version 9654 (0.0011) +[2024-07-05 16:26:38,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 39559168. Throughput: 0: 2942.5. Samples: 4886754. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:26:38,943][04005] Avg episode reward: [(0, '48.925')] +[2024-07-05 16:26:40,721][04594] Updated weights for policy 0, policy_version 9664 (0.0013) +[2024-07-05 16:26:43,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 39620608. Throughput: 0: 2936.5. Samples: 4904346. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:26:43,942][04005] Avg episode reward: [(0, '46.931')] +[2024-07-05 16:26:44,205][04594] Updated weights for policy 0, policy_version 9674 (0.0011) +[2024-07-05 16:26:47,686][04594] Updated weights for policy 0, policy_version 9684 (0.0011) +[2024-07-05 16:26:48,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 39677952. Throughput: 0: 2936.5. Samples: 4912948. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:26:48,943][04005] Avg episode reward: [(0, '46.808')] +[2024-07-05 16:26:51,171][04594] Updated weights for policy 0, policy_version 9694 (0.0013) +[2024-07-05 16:26:53,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 39735296. Throughput: 0: 2940.0. Samples: 4930862. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:26:53,942][04005] Avg episode reward: [(0, '49.176')] +[2024-07-05 16:26:54,663][04594] Updated weights for policy 0, policy_version 9704 (0.0012) +[2024-07-05 16:26:58,135][04594] Updated weights for policy 0, policy_version 9714 (0.0012) +[2024-07-05 16:26:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 39796736. Throughput: 0: 2936.4. Samples: 4948406. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:26:58,942][04005] Avg episode reward: [(0, '50.578')] +[2024-07-05 16:27:01,617][04594] Updated weights for policy 0, policy_version 9724 (0.0011) +[2024-07-05 16:27:03,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 39854080. Throughput: 0: 2941.0. Samples: 4957170. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:03,942][04005] Avg episode reward: [(0, '48.457')] +[2024-07-05 16:27:05,098][04594] Updated weights for policy 0, policy_version 9734 (0.0011) +[2024-07-05 16:27:08,577][04594] Updated weights for policy 0, policy_version 9744 (0.0011) +[2024-07-05 16:27:08,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 39915520. Throughput: 0: 2939.3. Samples: 4974926. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:08,942][04005] Avg episode reward: [(0, '49.329')] +[2024-07-05 16:27:12,081][04594] Updated weights for policy 0, policy_version 9754 (0.0011) +[2024-07-05 16:27:13,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 39972864. Throughput: 0: 2935.8. Samples: 4992436. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:13,942][04005] Avg episode reward: [(0, '50.260')] +[2024-07-05 16:27:15,569][04594] Updated weights for policy 0, policy_version 9764 (0.0011) +[2024-07-05 16:27:18,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 40030208. Throughput: 0: 2939.1. Samples: 5001240. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:18,942][04005] Avg episode reward: [(0, '50.302')] +[2024-07-05 16:27:19,046][04594] Updated weights for policy 0, policy_version 9774 (0.0013) +[2024-07-05 16:27:22,522][04594] Updated weights for policy 0, policy_version 9784 (0.0013) +[2024-07-05 16:27:23,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 40091648. Throughput: 0: 2938.8. Samples: 5018998. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:23,942][04005] Avg episode reward: [(0, '48.885')] +[2024-07-05 16:27:25,996][04594] Updated weights for policy 0, policy_version 9794 (0.0012) +[2024-07-05 16:27:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 40148992. Throughput: 0: 2937.8. Samples: 5036548. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:28,942][04005] Avg episode reward: [(0, '48.681')] +[2024-07-05 16:27:29,477][04594] Updated weights for policy 0, policy_version 9804 (0.0011) +[2024-07-05 16:27:32,961][04594] Updated weights for policy 0, policy_version 9814 (0.0011) +[2024-07-05 16:27:33,942][04005] Fps is (10 sec: 11468.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 40206336. Throughput: 0: 2946.3. Samples: 5045534. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:33,943][04005] Avg episode reward: [(0, '49.883')] +[2024-07-05 16:27:36,433][04594] Updated weights for policy 0, policy_version 9824 (0.0013) +[2024-07-05 16:27:38,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 40267776. Throughput: 0: 2939.0. Samples: 5063118. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:38,942][04005] Avg episode reward: [(0, '50.528')] +[2024-07-05 16:27:39,948][04594] Updated weights for policy 0, policy_version 9834 (0.0012) +[2024-07-05 16:27:43,427][04594] Updated weights for policy 0, policy_version 9844 (0.0011) +[2024-07-05 16:27:43,942][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 40325120. Throughput: 0: 2938.1. Samples: 5080620. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:43,942][04005] Avg episode reward: [(0, '50.736')] +[2024-07-05 16:27:46,912][04594] Updated weights for policy 0, policy_version 9854 (0.0011) +[2024-07-05 16:27:48,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 40382464. Throughput: 0: 2943.1. Samples: 5089610. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:48,942][04005] Avg episode reward: [(0, '50.498')] +[2024-07-05 16:27:50,393][04594] Updated weights for policy 0, policy_version 9864 (0.0011) +[2024-07-05 16:27:53,861][04594] Updated weights for policy 0, policy_version 9874 (0.0011) +[2024-07-05 16:27:53,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 40443904. Throughput: 0: 2938.5. Samples: 5107158. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:27:53,942][04005] Avg episode reward: [(0, '52.749')] +[2024-07-05 16:27:53,945][04581] Saving new best policy, reward=52.749! +[2024-07-05 16:27:57,364][04594] Updated weights for policy 0, policy_version 9884 (0.0011) +[2024-07-05 16:27:58,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 40501248. Throughput: 0: 2939.4. Samples: 5124710. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:27:58,943][04005] Avg episode reward: [(0, '53.010')] +[2024-07-05 16:27:59,091][04581] Saving new best policy, reward=53.010! +[2024-07-05 16:28:00,850][04594] Updated weights for policy 0, policy_version 9894 (0.0011) +[2024-07-05 16:28:03,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 40558592. Throughput: 0: 2942.9. Samples: 5133668. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-07-05 16:28:03,942][04005] Avg episode reward: [(0, '51.865')] +[2024-07-05 16:28:04,327][04594] Updated weights for policy 0, policy_version 9904 (0.0011) +[2024-07-05 16:28:07,809][04594] Updated weights for policy 0, policy_version 9914 (0.0011) +[2024-07-05 16:28:08,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 40620032. Throughput: 0: 2938.2. Samples: 5151218. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:08,942][04005] Avg episode reward: [(0, '51.416')] +[2024-07-05 16:28:11,309][04594] Updated weights for policy 0, policy_version 9924 (0.0011) +[2024-07-05 16:28:13,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 40677376. Throughput: 0: 2938.0. Samples: 5168756. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:13,943][04005] Avg episode reward: [(0, '50.699')] +[2024-07-05 16:28:14,780][04594] Updated weights for policy 0, policy_version 9934 (0.0011) +[2024-07-05 16:28:18,280][04594] Updated weights for policy 0, policy_version 9944 (0.0011) +[2024-07-05 16:28:18,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 40734720. Throughput: 0: 2936.9. Samples: 5177694. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:18,943][04005] Avg episode reward: [(0, '51.029')] +[2024-07-05 16:28:21,759][04594] Updated weights for policy 0, policy_version 9954 (0.0011) +[2024-07-05 16:28:23,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 40796160. Throughput: 0: 2936.7. Samples: 5195268. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:23,943][04005] Avg episode reward: [(0, '51.319')] +[2024-07-05 16:28:25,243][04594] Updated weights for policy 0, policy_version 9964 (0.0012) +[2024-07-05 16:28:28,716][04594] Updated weights for policy 0, policy_version 9974 (0.0012) +[2024-07-05 16:28:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 40853504. Throughput: 0: 2937.5. Samples: 5212808. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:28,942][04005] Avg episode reward: [(0, '50.038')] +[2024-07-05 16:28:32,195][04594] Updated weights for policy 0, policy_version 9984 (0.0012) +[2024-07-05 16:28:33,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 40914944. Throughput: 0: 2938.0. Samples: 5221818. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:33,942][04005] Avg episode reward: [(0, '50.502')] +[2024-07-05 16:28:33,945][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000009989_40914944.pth... +[2024-07-05 16:28:34,019][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000009300_38092800.pth +[2024-07-05 16:28:35,691][04594] Updated weights for policy 0, policy_version 9994 (0.0011) +[2024-07-05 16:28:38,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 40972288. Throughput: 0: 2936.9. Samples: 5239318. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:38,942][04005] Avg episode reward: [(0, '50.696')] +[2024-07-05 16:28:39,170][04594] Updated weights for policy 0, policy_version 10004 (0.0011) +[2024-07-05 16:28:42,665][04594] Updated weights for policy 0, policy_version 10014 (0.0012) +[2024-07-05 16:28:43,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 41029632. Throughput: 0: 2936.5. Samples: 5256852. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:43,942][04005] Avg episode reward: [(0, '52.575')] +[2024-07-05 16:28:46,140][04594] Updated weights for policy 0, policy_version 10024 (0.0011) +[2024-07-05 16:28:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 41091072. Throughput: 0: 2937.2. Samples: 5265840. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:48,942][04005] Avg episode reward: [(0, '52.598')] +[2024-07-05 16:28:49,624][04594] Updated weights for policy 0, policy_version 10034 (0.0013) +[2024-07-05 16:28:53,107][04594] Updated weights for policy 0, policy_version 10044 (0.0011) +[2024-07-05 16:28:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 41148416. Throughput: 0: 2937.2. Samples: 5283390. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:53,942][04005] Avg episode reward: [(0, '52.393')] +[2024-07-05 16:28:56,595][04594] Updated weights for policy 0, policy_version 10054 (0.0012) +[2024-07-05 16:28:58,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 41205760. Throughput: 0: 2936.9. Samples: 5300918. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:28:58,942][04005] Avg episode reward: [(0, '51.448')] +[2024-07-05 16:29:00,069][04594] Updated weights for policy 0, policy_version 10064 (0.0011) +[2024-07-05 16:29:03,558][04594] Updated weights for policy 0, policy_version 10074 (0.0011) +[2024-07-05 16:29:03,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 41267200. Throughput: 0: 2938.7. Samples: 5309934. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:03,942][04005] Avg episode reward: [(0, '50.519')] +[2024-07-05 16:29:07,035][04594] Updated weights for policy 0, policy_version 10084 (0.0011) +[2024-07-05 16:29:08,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 41324544. Throughput: 0: 2938.3. Samples: 5327494. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:08,943][04005] Avg episode reward: [(0, '49.900')] +[2024-07-05 16:29:10,521][04594] Updated weights for policy 0, policy_version 10094 (0.0012) +[2024-07-05 16:29:13,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 41381888. Throughput: 0: 2936.9. Samples: 5344968. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:13,943][04005] Avg episode reward: [(0, '50.178')] +[2024-07-05 16:29:14,014][04594] Updated weights for policy 0, policy_version 10104 (0.0011) +[2024-07-05 16:29:17,486][04594] Updated weights for policy 0, policy_version 10114 (0.0012) +[2024-07-05 16:29:18,942][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 41443328. Throughput: 0: 2936.6. Samples: 5353966. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:18,942][04005] Avg episode reward: [(0, '50.181')] +[2024-07-05 16:29:20,961][04594] Updated weights for policy 0, policy_version 10124 (0.0013) +[2024-07-05 16:29:23,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 41500672. Throughput: 0: 2938.3. Samples: 5371540. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:23,943][04005] Avg episode reward: [(0, '49.918')] +[2024-07-05 16:29:24,440][04594] Updated weights for policy 0, policy_version 10134 (0.0013) +[2024-07-05 16:29:27,918][04594] Updated weights for policy 0, policy_version 10144 (0.0012) +[2024-07-05 16:29:28,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 41558016. Throughput: 0: 2942.8. Samples: 5389278. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:28,942][04005] Avg episode reward: [(0, '48.692')] +[2024-07-05 16:29:31,436][04594] Updated weights for policy 0, policy_version 10154 (0.0011) +[2024-07-05 16:29:33,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 41619456. Throughput: 0: 2937.5. Samples: 5398028. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:33,942][04005] Avg episode reward: [(0, '49.097')] +[2024-07-05 16:29:34,937][04594] Updated weights for policy 0, policy_version 10164 (0.0011) +[2024-07-05 16:29:38,428][04594] Updated weights for policy 0, policy_version 10174 (0.0013) +[2024-07-05 16:29:38,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 41676800. Throughput: 0: 2936.5. Samples: 5415534. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:38,943][04005] Avg episode reward: [(0, '50.047')] +[2024-07-05 16:29:41,926][04594] Updated weights for policy 0, policy_version 10184 (0.0012) +[2024-07-05 16:29:43,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 41734144. Throughput: 0: 2936.3. Samples: 5433052. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:43,942][04005] Avg episode reward: [(0, '48.867')] +[2024-07-05 16:29:45,407][04594] Updated weights for policy 0, policy_version 10194 (0.0011) +[2024-07-05 16:29:48,896][04594] Updated weights for policy 0, policy_version 10204 (0.0011) +[2024-07-05 16:29:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 41795584. Throughput: 0: 2936.4. Samples: 5442072. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:48,942][04005] Avg episode reward: [(0, '49.818')] +[2024-07-05 16:29:52,382][04594] Updated weights for policy 0, policy_version 10214 (0.0011) +[2024-07-05 16:29:53,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 41852928. Throughput: 0: 2934.5. Samples: 5459546. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:53,942][04005] Avg episode reward: [(0, '49.389')] +[2024-07-05 16:29:55,874][04594] Updated weights for policy 0, policy_version 10224 (0.0011) +[2024-07-05 16:29:58,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 41910272. Throughput: 0: 2936.0. Samples: 5477086. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:29:58,942][04005] Avg episode reward: [(0, '48.155')] +[2024-07-05 16:29:59,352][04594] Updated weights for policy 0, policy_version 10234 (0.0012) +[2024-07-05 16:30:02,821][04594] Updated weights for policy 0, policy_version 10244 (0.0011) +[2024-07-05 16:30:03,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 41971712. Throughput: 0: 2936.6. Samples: 5486112. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:03,943][04005] Avg episode reward: [(0, '45.773')] +[2024-07-05 16:30:06,298][04594] Updated weights for policy 0, policy_version 10254 (0.0013) +[2024-07-05 16:30:08,942][04005] Fps is (10 sec: 11877.9, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 42029056. Throughput: 0: 2935.8. Samples: 5503650. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:08,943][04005] Avg episode reward: [(0, '44.966')] +[2024-07-05 16:30:09,787][04594] Updated weights for policy 0, policy_version 10264 (0.0011) +[2024-07-05 16:30:13,276][04594] Updated weights for policy 0, policy_version 10274 (0.0011) +[2024-07-05 16:30:13,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 42086400. Throughput: 0: 2933.8. Samples: 5521300. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:13,943][04005] Avg episode reward: [(0, '47.862')] +[2024-07-05 16:30:16,754][04594] Updated weights for policy 0, policy_version 10284 (0.0011) +[2024-07-05 16:30:18,942][04005] Fps is (10 sec: 11878.8, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 42147840. Throughput: 0: 2935.7. Samples: 5530134. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:18,942][04005] Avg episode reward: [(0, '49.156')] +[2024-07-05 16:30:20,252][04594] Updated weights for policy 0, policy_version 10294 (0.0013) +[2024-07-05 16:30:23,735][04594] Updated weights for policy 0, policy_version 10304 (0.0012) +[2024-07-05 16:30:23,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 42205184. Throughput: 0: 2936.0. Samples: 5547656. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:23,942][04005] Avg episode reward: [(0, '50.223')] +[2024-07-05 16:30:27,230][04594] Updated weights for policy 0, policy_version 10314 (0.0011) +[2024-07-05 16:30:28,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 42262528. Throughput: 0: 2940.8. Samples: 5565386. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:28,942][04005] Avg episode reward: [(0, '49.594')] +[2024-07-05 16:30:30,710][04594] Updated weights for policy 0, policy_version 10324 (0.0011) +[2024-07-05 16:30:33,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 42323968. Throughput: 0: 2935.7. Samples: 5574180. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:33,942][04005] Avg episode reward: [(0, '48.195')] +[2024-07-05 16:30:34,192][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000010334_42328064.pth... +[2024-07-05 16:30:34,194][04594] Updated weights for policy 0, policy_version 10334 (0.0011) +[2024-07-05 16:30:34,263][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000009645_39505920.pth +[2024-07-05 16:30:37,686][04594] Updated weights for policy 0, policy_version 10344 (0.0012) +[2024-07-05 16:30:38,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 42381312. Throughput: 0: 2937.1. Samples: 5591716. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:38,943][04005] Avg episode reward: [(0, '46.474')] +[2024-07-05 16:30:41,185][04594] Updated weights for policy 0, policy_version 10354 (0.0011) +[2024-07-05 16:30:43,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 42438656. Throughput: 0: 2939.2. Samples: 5609350. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:43,942][04005] Avg episode reward: [(0, '45.812')] +[2024-07-05 16:30:44,669][04594] Updated weights for policy 0, policy_version 10364 (0.0011) +[2024-07-05 16:30:48,158][04594] Updated weights for policy 0, policy_version 10374 (0.0011) +[2024-07-05 16:30:48,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 42500096. Throughput: 0: 2935.1. Samples: 5618192. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:48,942][04005] Avg episode reward: [(0, '46.339')] +[2024-07-05 16:30:51,648][04594] Updated weights for policy 0, policy_version 10384 (0.0011) +[2024-07-05 16:30:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 42557440. Throughput: 0: 2936.3. Samples: 5635780. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:53,942][04005] Avg episode reward: [(0, '46.719')] +[2024-07-05 16:30:55,122][04594] Updated weights for policy 0, policy_version 10394 (0.0012) +[2024-07-05 16:30:58,617][04594] Updated weights for policy 0, policy_version 10404 (0.0012) +[2024-07-05 16:30:58,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 42614784. Throughput: 0: 2938.6. Samples: 5653536. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:30:58,942][04005] Avg episode reward: [(0, '48.068')] +[2024-07-05 16:31:02,100][04594] Updated weights for policy 0, policy_version 10414 (0.0011) +[2024-07-05 16:31:03,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 42676224. Throughput: 0: 2937.2. Samples: 5662308. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:03,942][04005] Avg episode reward: [(0, '48.511')] +[2024-07-05 16:31:05,595][04594] Updated weights for policy 0, policy_version 10424 (0.0011) +[2024-07-05 16:31:08,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11742.0, 300 sec: 11746.5). Total num frames: 42733568. Throughput: 0: 2936.7. Samples: 5679806. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:08,942][04005] Avg episode reward: [(0, '50.677')] +[2024-07-05 16:31:09,086][04594] Updated weights for policy 0, policy_version 10434 (0.0011) +[2024-07-05 16:31:12,572][04594] Updated weights for policy 0, policy_version 10444 (0.0011) +[2024-07-05 16:31:13,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 42790912. Throughput: 0: 2937.0. Samples: 5697552. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:13,943][04005] Avg episode reward: [(0, '51.728')] +[2024-07-05 16:31:16,063][04594] Updated weights for policy 0, policy_version 10454 (0.0012) +[2024-07-05 16:31:18,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 42852352. Throughput: 0: 2936.8. Samples: 5706334. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:18,943][04005] Avg episode reward: [(0, '50.396')] +[2024-07-05 16:31:19,540][04594] Updated weights for policy 0, policy_version 10464 (0.0011) +[2024-07-05 16:31:23,015][04594] Updated weights for policy 0, policy_version 10474 (0.0011) +[2024-07-05 16:31:23,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 42909696. Throughput: 0: 2936.2. Samples: 5723844. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:23,943][04005] Avg episode reward: [(0, '49.326')] +[2024-07-05 16:31:26,499][04594] Updated weights for policy 0, policy_version 10484 (0.0013) +[2024-07-05 16:31:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 42971136. Throughput: 0: 2943.6. Samples: 5741810. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:28,942][04005] Avg episode reward: [(0, '48.535')] +[2024-07-05 16:31:29,977][04594] Updated weights for policy 0, policy_version 10494 (0.0011) +[2024-07-05 16:31:33,448][04594] Updated weights for policy 0, policy_version 10504 (0.0011) +[2024-07-05 16:31:33,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 43028480. Throughput: 0: 2937.9. Samples: 5750396. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:33,942][04005] Avg episode reward: [(0, '48.514')] +[2024-07-05 16:31:36,953][04594] Updated weights for policy 0, policy_version 10514 (0.0012) +[2024-07-05 16:31:38,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 43085824. Throughput: 0: 2938.1. Samples: 5767994. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:38,942][04005] Avg episode reward: [(0, '48.782')] +[2024-07-05 16:31:40,447][04594] Updated weights for policy 0, policy_version 10524 (0.0011) +[2024-07-05 16:31:43,942][04005] Fps is (10 sec: 11468.5, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 43143168. Throughput: 0: 2937.7. Samples: 5785734. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:43,943][04005] Avg episode reward: [(0, '48.862')] +[2024-07-05 16:31:43,945][04594] Updated weights for policy 0, policy_version 10534 (0.0012) +[2024-07-05 16:31:47,433][04594] Updated weights for policy 0, policy_version 10544 (0.0011) +[2024-07-05 16:31:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 43204608. Throughput: 0: 2935.7. Samples: 5794414. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:48,942][04005] Avg episode reward: [(0, '47.496')] +[2024-07-05 16:31:50,927][04594] Updated weights for policy 0, policy_version 10554 (0.0012) +[2024-07-05 16:31:53,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 43261952. Throughput: 0: 2936.3. Samples: 5811940. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:53,942][04005] Avg episode reward: [(0, '49.390')] +[2024-07-05 16:31:54,392][04594] Updated weights for policy 0, policy_version 10564 (0.0011) +[2024-07-05 16:31:57,892][04594] Updated weights for policy 0, policy_version 10574 (0.0011) +[2024-07-05 16:31:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 43323392. Throughput: 0: 2938.5. Samples: 5829784. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:31:58,942][04005] Avg episode reward: [(0, '48.598')] +[2024-07-05 16:32:01,374][04594] Updated weights for policy 0, policy_version 10584 (0.0011) +[2024-07-05 16:32:03,942][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 43380736. Throughput: 0: 2936.1. Samples: 5838460. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:03,942][04005] Avg episode reward: [(0, '49.772')] +[2024-07-05 16:32:04,859][04594] Updated weights for policy 0, policy_version 10594 (0.0012) +[2024-07-05 16:32:08,343][04594] Updated weights for policy 0, policy_version 10604 (0.0011) +[2024-07-05 16:32:08,942][04005] Fps is (10 sec: 11467.9, 60 sec: 11741.7, 300 sec: 11746.5). Total num frames: 43438080. Throughput: 0: 2937.3. Samples: 5856024. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:08,945][04005] Avg episode reward: [(0, '49.480')] +[2024-07-05 16:32:11,827][04594] Updated weights for policy 0, policy_version 10614 (0.0013) +[2024-07-05 16:32:13,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 43499520. Throughput: 0: 2935.2. Samples: 5873894. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:13,942][04005] Avg episode reward: [(0, '50.589')] +[2024-07-05 16:32:15,318][04594] Updated weights for policy 0, policy_version 10624 (0.0015) +[2024-07-05 16:32:18,792][04594] Updated weights for policy 0, policy_version 10634 (0.0011) +[2024-07-05 16:32:18,941][04005] Fps is (10 sec: 11879.1, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 43556864. Throughput: 0: 2934.9. Samples: 5882466. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:18,942][04005] Avg episode reward: [(0, '49.487')] +[2024-07-05 16:32:22,274][04594] Updated weights for policy 0, policy_version 10644 (0.0011) +[2024-07-05 16:32:23,942][04005] Fps is (10 sec: 11468.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 43614208. Throughput: 0: 2937.5. Samples: 5900184. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:23,942][04005] Avg episode reward: [(0, '50.762')] +[2024-07-05 16:32:25,765][04594] Updated weights for policy 0, policy_version 10654 (0.0011) +[2024-07-05 16:32:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 43675648. Throughput: 0: 2938.2. Samples: 5917954. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:28,942][04005] Avg episode reward: [(0, '48.951')] +[2024-07-05 16:32:29,259][04594] Updated weights for policy 0, policy_version 10664 (0.0011) +[2024-07-05 16:32:32,753][04594] Updated weights for policy 0, policy_version 10674 (0.0011) +[2024-07-05 16:32:33,942][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 43732992. Throughput: 0: 2935.0. Samples: 5926490. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:33,943][04005] Avg episode reward: [(0, '50.421')] +[2024-07-05 16:32:34,147][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000010678_43737088.pth... +[2024-07-05 16:32:34,219][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000009989_40914944.pth +[2024-07-05 16:32:36,254][04594] Updated weights for policy 0, policy_version 10684 (0.0013) +[2024-07-05 16:32:38,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 43790336. Throughput: 0: 2935.8. Samples: 5944050. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:38,942][04005] Avg episode reward: [(0, '49.034')] +[2024-07-05 16:32:39,751][04594] Updated weights for policy 0, policy_version 10694 (0.0012) +[2024-07-05 16:32:43,253][04594] Updated weights for policy 0, policy_version 10704 (0.0011) +[2024-07-05 16:32:43,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 43851776. Throughput: 0: 2935.0. Samples: 5961858. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:43,942][04005] Avg episode reward: [(0, '49.506')] +[2024-07-05 16:32:46,729][04594] Updated weights for policy 0, policy_version 10714 (0.0011) +[2024-07-05 16:32:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 43909120. Throughput: 0: 2934.2. Samples: 5970498. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:48,942][04005] Avg episode reward: [(0, '49.308')] +[2024-07-05 16:32:50,224][04594] Updated weights for policy 0, policy_version 10724 (0.0011) +[2024-07-05 16:32:53,695][04594] Updated weights for policy 0, policy_version 10734 (0.0011) +[2024-07-05 16:32:53,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 43966464. Throughput: 0: 2933.6. Samples: 5988034. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:53,942][04005] Avg episode reward: [(0, '49.224')] +[2024-07-05 16:32:57,180][04594] Updated weights for policy 0, policy_version 10744 (0.0011) +[2024-07-05 16:32:58,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 44027904. Throughput: 0: 2934.9. Samples: 6005966. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:32:58,942][04005] Avg episode reward: [(0, '48.291')] +[2024-07-05 16:33:00,677][04594] Updated weights for policy 0, policy_version 10754 (0.0013) +[2024-07-05 16:33:03,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44085248. Throughput: 0: 2934.6. Samples: 6014522. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 16:33:03,943][04005] Avg episode reward: [(0, '47.299')] +[2024-07-05 16:33:04,150][04594] Updated weights for policy 0, policy_version 10764 (0.0011) +[2024-07-05 16:33:07,645][04594] Updated weights for policy 0, policy_version 10774 (0.0012) +[2024-07-05 16:33:08,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11742.0, 300 sec: 11746.5). Total num frames: 44142592. Throughput: 0: 2931.7. Samples: 6032110. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 16:33:08,943][04005] Avg episode reward: [(0, '49.080')] +[2024-07-05 16:33:11,130][04594] Updated weights for policy 0, policy_version 10784 (0.0011) +[2024-07-05 16:33:13,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 44204032. Throughput: 0: 2934.8. Samples: 6050018. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 16:33:13,942][04005] Avg episode reward: [(0, '50.970')] +[2024-07-05 16:33:14,620][04594] Updated weights for policy 0, policy_version 10794 (0.0013) +[2024-07-05 16:33:18,108][04594] Updated weights for policy 0, policy_version 10804 (0.0011) +[2024-07-05 16:33:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44261376. Throughput: 0: 2935.2. Samples: 6058574. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:33:18,942][04005] Avg episode reward: [(0, '52.729')] +[2024-07-05 16:33:21,598][04594] Updated weights for policy 0, policy_version 10814 (0.0011) +[2024-07-05 16:33:23,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44318720. Throughput: 0: 2935.2. Samples: 6076136. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:33:23,943][04005] Avg episode reward: [(0, '50.394')] +[2024-07-05 16:33:25,075][04594] Updated weights for policy 0, policy_version 10824 (0.0013) +[2024-07-05 16:33:28,559][04594] Updated weights for policy 0, policy_version 10834 (0.0012) +[2024-07-05 16:33:28,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 44380160. Throughput: 0: 2937.5. Samples: 6094046. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 16:33:28,942][04005] Avg episode reward: [(0, '49.018')] +[2024-07-05 16:33:32,047][04594] Updated weights for policy 0, policy_version 10844 (0.0011) +[2024-07-05 16:33:33,941][04005] Fps is (10 sec: 11878.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44437504. Throughput: 0: 2935.4. Samples: 6102592. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 16:33:33,942][04005] Avg episode reward: [(0, '47.904')] +[2024-07-05 16:33:35,538][04594] Updated weights for policy 0, policy_version 10854 (0.0011) +[2024-07-05 16:33:38,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44494848. Throughput: 0: 2937.5. Samples: 6120220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 16:33:38,942][04005] Avg episode reward: [(0, '48.791')] +[2024-07-05 16:33:39,028][04594] Updated weights for policy 0, policy_version 10864 (0.0011) +[2024-07-05 16:33:42,519][04594] Updated weights for policy 0, policy_version 10874 (0.0011) +[2024-07-05 16:33:43,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44556288. Throughput: 0: 2936.1. Samples: 6138090. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 16:33:43,942][04005] Avg episode reward: [(0, '48.344')] +[2024-07-05 16:33:46,007][04594] Updated weights for policy 0, policy_version 10884 (0.0014) +[2024-07-05 16:33:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44613632. Throughput: 0: 2935.7. Samples: 6146628. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 16:33:48,942][04005] Avg episode reward: [(0, '48.669')] +[2024-07-05 16:33:49,493][04594] Updated weights for policy 0, policy_version 10894 (0.0011) +[2024-07-05 16:33:52,986][04594] Updated weights for policy 0, policy_version 10904 (0.0012) +[2024-07-05 16:33:53,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44670976. Throughput: 0: 2937.6. Samples: 6164302. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:33:53,942][04005] Avg episode reward: [(0, '48.497')] +[2024-07-05 16:33:56,460][04594] Updated weights for policy 0, policy_version 10914 (0.0011) +[2024-07-05 16:33:58,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44732416. Throughput: 0: 2936.8. Samples: 6182176. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:33:58,942][04005] Avg episode reward: [(0, '50.216')] +[2024-07-05 16:33:59,949][04594] Updated weights for policy 0, policy_version 10924 (0.0011) +[2024-07-05 16:34:03,422][04594] Updated weights for policy 0, policy_version 10934 (0.0011) +[2024-07-05 16:34:03,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44789760. Throughput: 0: 2936.9. Samples: 6190736. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:03,943][04005] Avg episode reward: [(0, '51.098')] +[2024-07-05 16:34:06,902][04594] Updated weights for policy 0, policy_version 10944 (0.0013) +[2024-07-05 16:34:08,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44847104. Throughput: 0: 2942.8. Samples: 6208560. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:08,943][04005] Avg episode reward: [(0, '51.417')] +[2024-07-05 16:34:10,407][04594] Updated weights for policy 0, policy_version 10954 (0.0011) +[2024-07-05 16:34:13,884][04594] Updated weights for policy 0, policy_version 10964 (0.0011) +[2024-07-05 16:34:13,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 44908544. Throughput: 0: 2937.2. Samples: 6226218. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:13,942][04005] Avg episode reward: [(0, '52.335')] +[2024-07-05 16:34:17,376][04594] Updated weights for policy 0, policy_version 10974 (0.0011) +[2024-07-05 16:34:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 44965888. Throughput: 0: 2936.8. Samples: 6234748. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:18,942][04005] Avg episode reward: [(0, '50.808')] +[2024-07-05 16:34:20,873][04594] Updated weights for policy 0, policy_version 10984 (0.0013) +[2024-07-05 16:34:23,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45023232. Throughput: 0: 2939.2. Samples: 6252486. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:23,943][04005] Avg episode reward: [(0, '50.446')] +[2024-07-05 16:34:24,349][04594] Updated weights for policy 0, policy_version 10994 (0.0011) +[2024-07-05 16:34:27,840][04594] Updated weights for policy 0, policy_version 11004 (0.0011) +[2024-07-05 16:34:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45084672. Throughput: 0: 2937.2. Samples: 6270262. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:28,942][04005] Avg episode reward: [(0, '49.619')] +[2024-07-05 16:34:31,314][04594] Updated weights for policy 0, policy_version 11014 (0.0012) +[2024-07-05 16:34:33,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 45142016. Throughput: 0: 2937.7. Samples: 6278824. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:33,942][04005] Avg episode reward: [(0, '49.265')] +[2024-07-05 16:34:34,095][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000011022_45146112.pth... +[2024-07-05 16:34:34,167][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000010334_42328064.pth +[2024-07-05 16:34:34,809][04594] Updated weights for policy 0, policy_version 11024 (0.0011) +[2024-07-05 16:34:38,278][04594] Updated weights for policy 0, policy_version 11034 (0.0011) +[2024-07-05 16:34:38,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45199360. Throughput: 0: 2942.9. Samples: 6296734. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:38,942][04005] Avg episode reward: [(0, '49.671')] +[2024-07-05 16:34:41,763][04594] Updated weights for policy 0, policy_version 11044 (0.0011) +[2024-07-05 16:34:43,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45260800. Throughput: 0: 2936.7. Samples: 6314326. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:43,942][04005] Avg episode reward: [(0, '49.837')] +[2024-07-05 16:34:45,267][04594] Updated weights for policy 0, policy_version 11054 (0.0012) +[2024-07-05 16:34:48,738][04594] Updated weights for policy 0, policy_version 11064 (0.0011) +[2024-07-05 16:34:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45318144. Throughput: 0: 2936.8. Samples: 6322892. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:48,942][04005] Avg episode reward: [(0, '49.859')] +[2024-07-05 16:34:52,231][04594] Updated weights for policy 0, policy_version 11074 (0.0011) +[2024-07-05 16:34:53,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45375488. Throughput: 0: 2939.2. Samples: 6340824. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:53,942][04005] Avg episode reward: [(0, '49.523')] +[2024-07-05 16:34:55,720][04594] Updated weights for policy 0, policy_version 11084 (0.0011) +[2024-07-05 16:34:58,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45436928. Throughput: 0: 2937.4. Samples: 6358400. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:34:58,942][04005] Avg episode reward: [(0, '49.690')] +[2024-07-05 16:34:59,196][04594] Updated weights for policy 0, policy_version 11094 (0.0012) +[2024-07-05 16:35:02,684][04594] Updated weights for policy 0, policy_version 11104 (0.0013) +[2024-07-05 16:35:03,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45494272. Throughput: 0: 2940.7. Samples: 6367082. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:35:03,943][04005] Avg episode reward: [(0, '50.336')] +[2024-07-05 16:35:06,168][04594] Updated weights for policy 0, policy_version 11114 (0.0011) +[2024-07-05 16:35:08,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45551616. Throughput: 0: 2943.6. Samples: 6384948. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-07-05 16:35:08,942][04005] Avg episode reward: [(0, '51.464')] +[2024-07-05 16:35:09,670][04594] Updated weights for policy 0, policy_version 11124 (0.0012) +[2024-07-05 16:35:13,145][04594] Updated weights for policy 0, policy_version 11134 (0.0011) +[2024-07-05 16:35:13,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45613056. Throughput: 0: 2938.3. Samples: 6402488. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 16:35:13,942][04005] Avg episode reward: [(0, '49.237')] +[2024-07-05 16:35:16,635][04594] Updated weights for policy 0, policy_version 11144 (0.0011) +[2024-07-05 16:35:18,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45670400. Throughput: 0: 2940.8. Samples: 6411160. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 16:35:18,942][04005] Avg episode reward: [(0, '51.199')] +[2024-07-05 16:35:20,106][04594] Updated weights for policy 0, policy_version 11154 (0.0011) +[2024-07-05 16:35:23,599][04594] Updated weights for policy 0, policy_version 11164 (0.0013) +[2024-07-05 16:35:23,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45727744. Throughput: 0: 2939.8. Samples: 6429024. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 16:35:23,942][04005] Avg episode reward: [(0, '49.111')] +[2024-07-05 16:35:27,078][04594] Updated weights for policy 0, policy_version 11174 (0.0012) +[2024-07-05 16:35:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45789184. Throughput: 0: 2937.9. Samples: 6446532. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 16:35:28,942][04005] Avg episode reward: [(0, '48.303')] +[2024-07-05 16:35:30,575][04594] Updated weights for policy 0, policy_version 11184 (0.0012) +[2024-07-05 16:35:33,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45846528. Throughput: 0: 2941.6. Samples: 6455262. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:35:33,942][04005] Avg episode reward: [(0, '47.076')] +[2024-07-05 16:35:34,062][04594] Updated weights for policy 0, policy_version 11194 (0.0011) +[2024-07-05 16:35:37,543][04594] Updated weights for policy 0, policy_version 11204 (0.0011) +[2024-07-05 16:35:38,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 45907968. Throughput: 0: 2938.8. Samples: 6473072. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 16:35:38,943][04005] Avg episode reward: [(0, '48.867')] +[2024-07-05 16:35:41,035][04594] Updated weights for policy 0, policy_version 11214 (0.0011) +[2024-07-05 16:35:43,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 45965312. Throughput: 0: 2938.0. Samples: 6490608. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 16:35:43,942][04005] Avg episode reward: [(0, '51.048')] +[2024-07-05 16:35:44,508][04594] Updated weights for policy 0, policy_version 11224 (0.0011) +[2024-07-05 16:35:47,994][04594] Updated weights for policy 0, policy_version 11234 (0.0011) +[2024-07-05 16:35:48,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46022656. Throughput: 0: 2941.4. Samples: 6499446. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 16:35:48,943][04005] Avg episode reward: [(0, '52.012')] +[2024-07-05 16:35:51,476][04594] Updated weights for policy 0, policy_version 11244 (0.0011) +[2024-07-05 16:35:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 46084096. Throughput: 0: 2937.7. Samples: 6517146. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 16:35:53,942][04005] Avg episode reward: [(0, '53.305')] +[2024-07-05 16:35:53,945][04581] Saving new best policy, reward=53.305! +[2024-07-05 16:35:54,972][04594] Updated weights for policy 0, policy_version 11254 (0.0012) +[2024-07-05 16:35:58,457][04594] Updated weights for policy 0, policy_version 11264 (0.0011) +[2024-07-05 16:35:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46141440. Throughput: 0: 2938.6. Samples: 6534724. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:35:58,942][04005] Avg episode reward: [(0, '52.576')] +[2024-07-05 16:36:01,937][04594] Updated weights for policy 0, policy_version 11274 (0.0011) +[2024-07-05 16:36:03,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46198784. Throughput: 0: 2943.1. Samples: 6543600. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:03,943][04005] Avg episode reward: [(0, '50.455')] +[2024-07-05 16:36:05,413][04594] Updated weights for policy 0, policy_version 11284 (0.0011) +[2024-07-05 16:36:08,908][04594] Updated weights for policy 0, policy_version 11294 (0.0011) +[2024-07-05 16:36:08,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 46260224. Throughput: 0: 2938.1. Samples: 6561236. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:08,942][04005] Avg episode reward: [(0, '49.859')] +[2024-07-05 16:36:12,397][04594] Updated weights for policy 0, policy_version 11304 (0.0013) +[2024-07-05 16:36:13,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46317568. Throughput: 0: 2937.8. Samples: 6578734. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:13,942][04005] Avg episode reward: [(0, '49.633')] +[2024-07-05 16:36:15,881][04594] Updated weights for policy 0, policy_version 11314 (0.0012) +[2024-07-05 16:36:18,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46374912. Throughput: 0: 2942.2. Samples: 6587662. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:18,942][04005] Avg episode reward: [(0, '51.385')] +[2024-07-05 16:36:19,366][04594] Updated weights for policy 0, policy_version 11324 (0.0011) +[2024-07-05 16:36:22,852][04594] Updated weights for policy 0, policy_version 11334 (0.0011) +[2024-07-05 16:36:23,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11746.5). Total num frames: 46436352. Throughput: 0: 2937.5. Samples: 6605258. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:23,942][04005] Avg episode reward: [(0, '50.288')] +[2024-07-05 16:36:26,332][04594] Updated weights for policy 0, policy_version 11344 (0.0012) +[2024-07-05 16:36:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46493696. Throughput: 0: 2938.5. Samples: 6622842. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:28,942][04005] Avg episode reward: [(0, '48.083')] +[2024-07-05 16:36:29,810][04594] Updated weights for policy 0, policy_version 11354 (0.0011) +[2024-07-05 16:36:33,298][04594] Updated weights for policy 0, policy_version 11364 (0.0011) +[2024-07-05 16:36:33,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46551040. Throughput: 0: 2942.0. Samples: 6631834. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:33,942][04005] Avg episode reward: [(0, '48.888')] +[2024-07-05 16:36:33,987][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000011366_46555136.pth... +[2024-07-05 16:36:34,058][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000010678_43737088.pth +[2024-07-05 16:36:36,785][04594] Updated weights for policy 0, policy_version 11374 (0.0011) +[2024-07-05 16:36:38,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 46612480. Throughput: 0: 2938.7. Samples: 6649386. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:38,942][04005] Avg episode reward: [(0, '49.482')] +[2024-07-05 16:36:40,294][04594] Updated weights for policy 0, policy_version 11384 (0.0012) +[2024-07-05 16:36:43,785][04594] Updated weights for policy 0, policy_version 11394 (0.0011) +[2024-07-05 16:36:43,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46669824. Throughput: 0: 2936.4. Samples: 6666864. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:43,942][04005] Avg episode reward: [(0, '51.452')] +[2024-07-05 16:36:47,279][04594] Updated weights for policy 0, policy_version 11404 (0.0012) +[2024-07-05 16:36:48,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46727168. Throughput: 0: 2937.2. Samples: 6675774. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:48,942][04005] Avg episode reward: [(0, '48.515')] +[2024-07-05 16:36:50,750][04594] Updated weights for policy 0, policy_version 11414 (0.0011) +[2024-07-05 16:36:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46788608. Throughput: 0: 2936.2. Samples: 6693364. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:53,942][04005] Avg episode reward: [(0, '46.742')] +[2024-07-05 16:36:54,248][04594] Updated weights for policy 0, policy_version 11424 (0.0011) +[2024-07-05 16:36:57,722][04594] Updated weights for policy 0, policy_version 11434 (0.0012) +[2024-07-05 16:36:58,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46845952. Throughput: 0: 2938.0. Samples: 6710942. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:36:58,942][04005] Avg episode reward: [(0, '46.073')] +[2024-07-05 16:37:01,216][04594] Updated weights for policy 0, policy_version 11444 (0.0014) +[2024-07-05 16:37:03,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46903296. Throughput: 0: 2938.3. Samples: 6719884. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:37:03,942][04005] Avg episode reward: [(0, '47.813')] +[2024-07-05 16:37:04,705][04594] Updated weights for policy 0, policy_version 11454 (0.0011) +[2024-07-05 16:37:08,183][04594] Updated weights for policy 0, policy_version 11464 (0.0011) +[2024-07-05 16:37:08,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 46964736. Throughput: 0: 2937.0. Samples: 6737424. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:37:08,942][04005] Avg episode reward: [(0, '49.559')] +[2024-07-05 16:37:11,674][04594] Updated weights for policy 0, policy_version 11474 (0.0011) +[2024-07-05 16:37:13,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47022080. Throughput: 0: 2935.8. Samples: 6754954. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:37:13,943][04005] Avg episode reward: [(0, '50.344')] +[2024-07-05 16:37:15,167][04594] Updated weights for policy 0, policy_version 11484 (0.0011) +[2024-07-05 16:37:18,638][04594] Updated weights for policy 0, policy_version 11494 (0.0011) +[2024-07-05 16:37:18,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47079424. Throughput: 0: 2936.0. Samples: 6763952. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:37:18,942][04005] Avg episode reward: [(0, '50.588')] +[2024-07-05 16:37:22,121][04594] Updated weights for policy 0, policy_version 11504 (0.0012) +[2024-07-05 16:37:23,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47140864. Throughput: 0: 2935.7. Samples: 6781490. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:37:23,942][04005] Avg episode reward: [(0, '51.437')] +[2024-07-05 16:37:25,612][04594] Updated weights for policy 0, policy_version 11514 (0.0011) +[2024-07-05 16:37:28,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 47198208. Throughput: 0: 2937.4. Samples: 6799048. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:37:28,943][04005] Avg episode reward: [(0, '50.962')] +[2024-07-05 16:37:29,094][04594] Updated weights for policy 0, policy_version 11524 (0.0011) +[2024-07-05 16:37:32,583][04594] Updated weights for policy 0, policy_version 11534 (0.0011) +[2024-07-05 16:37:33,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47255552. Throughput: 0: 2938.1. Samples: 6807990. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:37:33,942][04005] Avg episode reward: [(0, '50.523')] +[2024-07-05 16:37:36,074][04594] Updated weights for policy 0, policy_version 11544 (0.0011) +[2024-07-05 16:37:38,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47316992. Throughput: 0: 2937.2. Samples: 6825536. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:37:38,942][04005] Avg episode reward: [(0, '49.497')] +[2024-07-05 16:37:39,571][04594] Updated weights for policy 0, policy_version 11554 (0.0011) +[2024-07-05 16:37:43,054][04594] Updated weights for policy 0, policy_version 11564 (0.0011) +[2024-07-05 16:37:43,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47374336. Throughput: 0: 2936.3. Samples: 6843074. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:37:43,942][04005] Avg episode reward: [(0, '49.931')] +[2024-07-05 16:37:46,546][04594] Updated weights for policy 0, policy_version 11574 (0.0012) +[2024-07-05 16:37:48,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 47431680. Throughput: 0: 2937.6. Samples: 6852076. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:37:48,943][04005] Avg episode reward: [(0, '49.983')] +[2024-07-05 16:37:50,029][04594] Updated weights for policy 0, policy_version 11584 (0.0011) +[2024-07-05 16:37:53,505][04594] Updated weights for policy 0, policy_version 11594 (0.0011) +[2024-07-05 16:37:53,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47493120. Throughput: 0: 2938.0. Samples: 6869636. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:37:53,942][04005] Avg episode reward: [(0, '49.103')] +[2024-07-05 16:37:56,982][04594] Updated weights for policy 0, policy_version 11604 (0.0011) +[2024-07-05 16:37:58,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47550464. Throughput: 0: 2937.7. Samples: 6887150. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:37:58,942][04005] Avg episode reward: [(0, '49.347')] +[2024-07-05 16:38:00,459][04594] Updated weights for policy 0, policy_version 11614 (0.0011) +[2024-07-05 16:38:03,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47607808. Throughput: 0: 2937.7. Samples: 6896146. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:38:03,942][04005] Avg episode reward: [(0, '48.356')] +[2024-07-05 16:38:03,962][04594] Updated weights for policy 0, policy_version 11624 (0.0011) +[2024-07-05 16:38:07,438][04594] Updated weights for policy 0, policy_version 11634 (0.0011) +[2024-07-05 16:38:08,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47669248. Throughput: 0: 2938.0. Samples: 6913702. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:38:08,942][04005] Avg episode reward: [(0, '48.807')] +[2024-07-05 16:38:10,929][04594] Updated weights for policy 0, policy_version 11644 (0.0011) +[2024-07-05 16:38:13,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47726592. Throughput: 0: 2937.8. Samples: 6931250. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:38:13,942][04005] Avg episode reward: [(0, '48.808')] +[2024-07-05 16:38:14,402][04594] Updated weights for policy 0, policy_version 11654 (0.0011) +[2024-07-05 16:38:17,899][04594] Updated weights for policy 0, policy_version 11664 (0.0011) +[2024-07-05 16:38:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 47788032. Throughput: 0: 2938.0. Samples: 6940198. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:38:18,942][04005] Avg episode reward: [(0, '48.579')] +[2024-07-05 16:38:21,385][04594] Updated weights for policy 0, policy_version 11674 (0.0011) +[2024-07-05 16:38:23,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 47845376. Throughput: 0: 2938.3. Samples: 6957760. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:38:23,942][04005] Avg episode reward: [(0, '47.296')] +[2024-07-05 16:38:24,852][04594] Updated weights for policy 0, policy_version 11684 (0.0011) +[2024-07-05 16:38:28,330][04594] Updated weights for policy 0, policy_version 11694 (0.0012) +[2024-07-05 16:38:28,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 47902720. Throughput: 0: 2937.8. Samples: 6975276. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:38:28,942][04005] Avg episode reward: [(0, '48.753')] +[2024-07-05 16:38:31,819][04594] Updated weights for policy 0, policy_version 11704 (0.0011) +[2024-07-05 16:38:33,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 47964160. Throughput: 0: 2938.1. Samples: 6984290. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:38:33,942][04005] Avg episode reward: [(0, '50.810')] +[2024-07-05 16:38:33,945][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000011710_47964160.pth... +[2024-07-05 16:38:34,027][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000011022_45146112.pth +[2024-07-05 16:38:35,323][04594] Updated weights for policy 0, policy_version 11714 (0.0011) +[2024-07-05 16:38:38,802][04594] Updated weights for policy 0, policy_version 11724 (0.0011) +[2024-07-05 16:38:38,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 48021504. Throughput: 0: 2937.0. Samples: 7001800. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:38:38,942][04005] Avg episode reward: [(0, '50.553')] +[2024-07-05 16:38:42,315][04594] Updated weights for policy 0, policy_version 11734 (0.0011) +[2024-07-05 16:38:43,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 48078848. Throughput: 0: 2937.2. Samples: 7019326. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:38:43,944][04005] Avg episode reward: [(0, '51.312')] +[2024-07-05 16:38:45,792][04594] Updated weights for policy 0, policy_version 11744 (0.0011) +[2024-07-05 16:38:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 48140288. Throughput: 0: 2936.6. Samples: 7028294. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:38:48,942][04005] Avg episode reward: [(0, '48.755')] +[2024-07-05 16:38:49,281][04594] Updated weights for policy 0, policy_version 11754 (0.0014) +[2024-07-05 16:38:52,747][04594] Updated weights for policy 0, policy_version 11764 (0.0012) +[2024-07-05 16:38:53,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 48197632. Throughput: 0: 2936.8. Samples: 7045860. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:38:53,942][04005] Avg episode reward: [(0, '50.276')] +[2024-07-05 16:38:56,236][04594] Updated weights for policy 0, policy_version 11774 (0.0011) +[2024-07-05 16:38:58,941][04005] Fps is (10 sec: 11468.6, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 48254976. Throughput: 0: 2937.2. Samples: 7063424. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:38:58,942][04005] Avg episode reward: [(0, '49.892')] +[2024-07-05 16:38:59,715][04594] Updated weights for policy 0, policy_version 11784 (0.0011) +[2024-07-05 16:39:03,200][04594] Updated weights for policy 0, policy_version 11794 (0.0012) +[2024-07-05 16:39:03,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 48316416. Throughput: 0: 2937.9. Samples: 7072404. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:39:03,942][04005] Avg episode reward: [(0, '52.650')] +[2024-07-05 16:39:06,681][04594] Updated weights for policy 0, policy_version 11804 (0.0012) +[2024-07-05 16:39:08,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 48373760. Throughput: 0: 2937.7. Samples: 7089958. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:39:08,942][04005] Avg episode reward: [(0, '52.714')] +[2024-07-05 16:39:10,166][04594] Updated weights for policy 0, policy_version 11814 (0.0011) +[2024-07-05 16:39:13,646][04594] Updated weights for policy 0, policy_version 11824 (0.0013) +[2024-07-05 16:39:13,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 48431104. Throughput: 0: 2938.7. Samples: 7107516. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:39:13,942][04005] Avg episode reward: [(0, '51.811')] +[2024-07-05 16:39:17,137][04594] Updated weights for policy 0, policy_version 11834 (0.0012) +[2024-07-05 16:39:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 48492544. Throughput: 0: 2937.4. Samples: 7116474. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:39:18,942][04005] Avg episode reward: [(0, '50.847')] +[2024-07-05 16:39:20,625][04594] Updated weights for policy 0, policy_version 11844 (0.0012) +[2024-07-05 16:39:23,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 48549888. Throughput: 0: 2937.2. Samples: 7133976. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:39:23,942][04005] Avg episode reward: [(0, '50.308')] +[2024-07-05 16:39:24,101][04594] Updated weights for policy 0, policy_version 11854 (0.0011) +[2024-07-05 16:39:27,595][04594] Updated weights for policy 0, policy_version 11864 (0.0011) +[2024-07-05 16:39:28,942][04005] Fps is (10 sec: 11468.2, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 48607232. Throughput: 0: 2939.6. Samples: 7151608. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:39:28,944][04005] Avg episode reward: [(0, '51.111')] +[2024-07-05 16:39:31,071][04594] Updated weights for policy 0, policy_version 11874 (0.0011) +[2024-07-05 16:39:33,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 48668672. Throughput: 0: 2938.7. Samples: 7160536. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:39:33,943][04005] Avg episode reward: [(0, '51.793')] +[2024-07-05 16:39:34,560][04594] Updated weights for policy 0, policy_version 11884 (0.0011) +[2024-07-05 16:39:38,037][04594] Updated weights for policy 0, policy_version 11894 (0.0011) +[2024-07-05 16:39:38,942][04005] Fps is (10 sec: 11878.8, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 48726016. Throughput: 0: 2938.7. Samples: 7178102. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:39:38,943][04005] Avg episode reward: [(0, '50.672')] +[2024-07-05 16:39:41,523][04594] Updated weights for policy 0, policy_version 11904 (0.0011) +[2024-07-05 16:39:43,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 48783360. Throughput: 0: 2941.6. Samples: 7195798. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:39:43,942][04005] Avg episode reward: [(0, '48.781')] +[2024-07-05 16:39:45,016][04594] Updated weights for policy 0, policy_version 11914 (0.0011) +[2024-07-05 16:39:48,493][04594] Updated weights for policy 0, policy_version 11924 (0.0011) +[2024-07-05 16:39:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 48844800. Throughput: 0: 2937.9. Samples: 7204608. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:39:48,943][04005] Avg episode reward: [(0, '49.741')] +[2024-07-05 16:39:51,966][04594] Updated weights for policy 0, policy_version 11934 (0.0011) +[2024-07-05 16:39:53,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 48902144. Throughput: 0: 2938.5. Samples: 7222190. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:39:53,943][04005] Avg episode reward: [(0, '51.164')] +[2024-07-05 16:39:55,446][04594] Updated weights for policy 0, policy_version 11944 (0.0011) +[2024-07-05 16:39:58,940][04594] Updated weights for policy 0, policy_version 11954 (0.0011) +[2024-07-05 16:39:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 48963584. Throughput: 0: 2944.5. Samples: 7240018. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:39:58,942][04005] Avg episode reward: [(0, '51.874')] +[2024-07-05 16:40:02,427][04594] Updated weights for policy 0, policy_version 11964 (0.0011) +[2024-07-05 16:40:03,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 49020928. Throughput: 0: 2938.0. Samples: 7248684. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:40:03,942][04005] Avg episode reward: [(0, '52.097')] +[2024-07-05 16:40:05,914][04594] Updated weights for policy 0, policy_version 11974 (0.0012) +[2024-07-05 16:40:08,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 49078272. Throughput: 0: 2938.2. Samples: 7266196. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:40:08,943][04005] Avg episode reward: [(0, '50.524')] +[2024-07-05 16:40:09,451][04594] Updated weights for policy 0, policy_version 11984 (0.0011) +[2024-07-05 16:40:12,888][04594] Updated weights for policy 0, policy_version 11994 (0.0013) +[2024-07-05 16:40:13,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 49139712. Throughput: 0: 2943.6. Samples: 7284070. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:40:13,942][04005] Avg episode reward: [(0, '49.308')] +[2024-07-05 16:40:16,365][04594] Updated weights for policy 0, policy_version 12004 (0.0011) +[2024-07-05 16:40:18,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 49197056. Throughput: 0: 2937.3. Samples: 7292712. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:40:18,942][04005] Avg episode reward: [(0, '48.621')] +[2024-07-05 16:40:19,846][04594] Updated weights for policy 0, policy_version 12014 (0.0011) +[2024-07-05 16:40:23,331][04594] Updated weights for policy 0, policy_version 12024 (0.0011) +[2024-07-05 16:40:23,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 49254400. Throughput: 0: 2939.2. Samples: 7310368. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:40:23,942][04005] Avg episode reward: [(0, '48.919')] +[2024-07-05 16:40:26,803][04594] Updated weights for policy 0, policy_version 12034 (0.0011) +[2024-07-05 16:40:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 49315840. Throughput: 0: 2943.2. Samples: 7328240. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:40:28,942][04005] Avg episode reward: [(0, '51.087')] +[2024-07-05 16:40:30,297][04594] Updated weights for policy 0, policy_version 12044 (0.0011) +[2024-07-05 16:40:33,780][04594] Updated weights for policy 0, policy_version 12054 (0.0011) +[2024-07-05 16:40:33,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 49373184. Throughput: 0: 2937.1. Samples: 7336778. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:40:33,942][04005] Avg episode reward: [(0, '52.462')] +[2024-07-05 16:40:34,123][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000012055_49377280.pth... +[2024-07-05 16:40:34,197][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000011366_46555136.pth +[2024-07-05 16:40:37,284][04594] Updated weights for policy 0, policy_version 12064 (0.0011) +[2024-07-05 16:40:38,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 49430528. Throughput: 0: 2936.9. Samples: 7354352. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:40:38,942][04005] Avg episode reward: [(0, '53.781')] +[2024-07-05 16:40:39,019][04581] Saving new best policy, reward=53.781! +[2024-07-05 16:40:40,792][04594] Updated weights for policy 0, policy_version 12074 (0.0012) +[2024-07-05 16:40:43,941][04005] Fps is (10 sec: 11878.7, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 49491968. Throughput: 0: 2936.0. Samples: 7372138. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:40:43,942][04005] Avg episode reward: [(0, '52.510')] +[2024-07-05 16:40:44,276][04594] Updated weights for policy 0, policy_version 12084 (0.0011) +[2024-07-05 16:40:47,758][04594] Updated weights for policy 0, policy_version 12094 (0.0011) +[2024-07-05 16:40:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 49549312. Throughput: 0: 2934.1. Samples: 7380718. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:40:48,942][04005] Avg episode reward: [(0, '51.087')] +[2024-07-05 16:40:51,234][04594] Updated weights for policy 0, policy_version 12104 (0.0011) +[2024-07-05 16:40:53,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 49606656. Throughput: 0: 2938.1. Samples: 7398412. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:40:53,943][04005] Avg episode reward: [(0, '49.817')] +[2024-07-05 16:40:54,711][04594] Updated weights for policy 0, policy_version 12114 (0.0011) +[2024-07-05 16:40:58,202][04594] Updated weights for policy 0, policy_version 12124 (0.0013) +[2024-07-05 16:40:58,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 49668096. Throughput: 0: 2936.5. Samples: 7416212. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:40:58,942][04005] Avg episode reward: [(0, '49.742')] +[2024-07-05 16:41:01,684][04594] Updated weights for policy 0, policy_version 12134 (0.0012) +[2024-07-05 16:41:03,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 49725440. Throughput: 0: 2934.8. Samples: 7424780. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:03,942][04005] Avg episode reward: [(0, '49.113')] +[2024-07-05 16:41:05,167][04594] Updated weights for policy 0, policy_version 12144 (0.0011) +[2024-07-05 16:41:08,646][04594] Updated weights for policy 0, policy_version 12154 (0.0011) +[2024-07-05 16:41:08,942][04005] Fps is (10 sec: 11468.1, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 49782784. Throughput: 0: 2937.1. Samples: 7442540. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:08,943][04005] Avg episode reward: [(0, '48.409')] +[2024-07-05 16:41:12,139][04594] Updated weights for policy 0, policy_version 12164 (0.0014) +[2024-07-05 16:41:13,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 49844224. Throughput: 0: 2934.0. Samples: 7460268. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:13,942][04005] Avg episode reward: [(0, '50.590')] +[2024-07-05 16:41:15,633][04594] Updated weights for policy 0, policy_version 12174 (0.0012) +[2024-07-05 16:41:18,941][04005] Fps is (10 sec: 11879.1, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 49901568. Throughput: 0: 2934.4. Samples: 7468826. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:18,942][04005] Avg episode reward: [(0, '51.798')] +[2024-07-05 16:41:19,121][04594] Updated weights for policy 0, policy_version 12184 (0.0011) +[2024-07-05 16:41:22,607][04594] Updated weights for policy 0, policy_version 12194 (0.0013) +[2024-07-05 16:41:23,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 49958912. Throughput: 0: 2938.3. Samples: 7486576. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:23,942][04005] Avg episode reward: [(0, '52.953')] +[2024-07-05 16:41:26,090][04594] Updated weights for policy 0, policy_version 12204 (0.0011) +[2024-07-05 16:41:28,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 50020352. Throughput: 0: 2938.1. Samples: 7504354. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:28,942][04005] Avg episode reward: [(0, '51.217')] +[2024-07-05 16:41:29,580][04594] Updated weights for policy 0, policy_version 12214 (0.0011) +[2024-07-05 16:41:33,060][04594] Updated weights for policy 0, policy_version 12224 (0.0011) +[2024-07-05 16:41:33,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 50077696. Throughput: 0: 2937.9. Samples: 7512922. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:33,943][04005] Avg episode reward: [(0, '49.002')] +[2024-07-05 16:41:36,556][04594] Updated weights for policy 0, policy_version 12234 (0.0011) +[2024-07-05 16:41:38,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 50135040. Throughput: 0: 2940.3. Samples: 7530724. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:38,942][04005] Avg episode reward: [(0, '48.517')] +[2024-07-05 16:41:40,052][04594] Updated weights for policy 0, policy_version 12244 (0.0011) +[2024-07-05 16:41:43,535][04594] Updated weights for policy 0, policy_version 12254 (0.0013) +[2024-07-05 16:41:43,941][04005] Fps is (10 sec: 11878.6, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 50196480. Throughput: 0: 2936.9. Samples: 7548374. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:43,942][04005] Avg episode reward: [(0, '49.504')] +[2024-07-05 16:41:47,024][04594] Updated weights for policy 0, policy_version 12264 (0.0012) +[2024-07-05 16:41:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 50253824. Throughput: 0: 2936.8. Samples: 7556934. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:48,942][04005] Avg episode reward: [(0, '49.769')] +[2024-07-05 16:41:50,506][04594] Updated weights for policy 0, policy_version 12274 (0.0011) +[2024-07-05 16:41:53,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 50311168. Throughput: 0: 2939.9. Samples: 7574832. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 16:41:53,942][04005] Avg episode reward: [(0, '51.417')] +[2024-07-05 16:41:53,982][04594] Updated weights for policy 0, policy_version 12284 (0.0012) +[2024-07-05 16:41:57,483][04594] Updated weights for policy 0, policy_version 12294 (0.0011) +[2024-07-05 16:41:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 50372608. Throughput: 0: 2937.9. Samples: 7592472. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:41:58,942][04005] Avg episode reward: [(0, '51.745')] +[2024-07-05 16:42:00,958][04594] Updated weights for policy 0, policy_version 12304 (0.0011) +[2024-07-05 16:42:03,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 50429952. Throughput: 0: 2938.3. Samples: 7601048. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:03,943][04005] Avg episode reward: [(0, '52.256')] +[2024-07-05 16:42:04,431][04594] Updated weights for policy 0, policy_version 12314 (0.0011) +[2024-07-05 16:42:07,935][04594] Updated weights for policy 0, policy_version 12324 (0.0012) +[2024-07-05 16:42:08,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11742.0, 300 sec: 11746.5). Total num frames: 50487296. Throughput: 0: 2939.9. Samples: 7618870. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:08,942][04005] Avg episode reward: [(0, '51.085')] +[2024-07-05 16:42:11,428][04594] Updated weights for policy 0, policy_version 12334 (0.0011) +[2024-07-05 16:42:13,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 50548736. Throughput: 0: 2936.4. Samples: 7636494. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:13,942][04005] Avg episode reward: [(0, '50.318')] +[2024-07-05 16:42:14,925][04594] Updated weights for policy 0, policy_version 12344 (0.0012) +[2024-07-05 16:42:18,413][04594] Updated weights for policy 0, policy_version 12354 (0.0012) +[2024-07-05 16:42:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 50606080. Throughput: 0: 2935.3. Samples: 7645010. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:18,942][04005] Avg episode reward: [(0, '51.215')] +[2024-07-05 16:42:21,907][04594] Updated weights for policy 0, policy_version 12364 (0.0013) +[2024-07-05 16:42:23,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 50663424. Throughput: 0: 2935.8. Samples: 7662834. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:23,942][04005] Avg episode reward: [(0, '51.016')] +[2024-07-05 16:42:25,385][04594] Updated weights for policy 0, policy_version 12374 (0.0013) +[2024-07-05 16:42:28,871][04594] Updated weights for policy 0, policy_version 12384 (0.0011) +[2024-07-05 16:42:28,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 50724864. Throughput: 0: 2936.8. Samples: 7680530. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:28,942][04005] Avg episode reward: [(0, '50.200')] +[2024-07-05 16:42:32,358][04594] Updated weights for policy 0, policy_version 12394 (0.0011) +[2024-07-05 16:42:33,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 50782208. Throughput: 0: 2936.3. Samples: 7689068. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:33,942][04005] Avg episode reward: [(0, '49.145')] +[2024-07-05 16:42:34,094][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000012399_50786304.pth... +[2024-07-05 16:42:34,166][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000011710_47964160.pth +[2024-07-05 16:42:35,861][04594] Updated weights for policy 0, policy_version 12404 (0.0013) +[2024-07-05 16:42:38,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 50839552. Throughput: 0: 2935.4. Samples: 7706924. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:38,942][04005] Avg episode reward: [(0, '49.475')] +[2024-07-05 16:42:39,334][04594] Updated weights for policy 0, policy_version 12414 (0.0012) +[2024-07-05 16:42:42,824][04594] Updated weights for policy 0, policy_version 12424 (0.0011) +[2024-07-05 16:42:43,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 50900992. Throughput: 0: 2936.8. Samples: 7724626. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:43,942][04005] Avg episode reward: [(0, '50.700')] +[2024-07-05 16:42:46,300][04594] Updated weights for policy 0, policy_version 12434 (0.0011) +[2024-07-05 16:42:48,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 50958336. Throughput: 0: 2936.9. Samples: 7733208. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:48,943][04005] Avg episode reward: [(0, '49.446')] +[2024-07-05 16:42:49,777][04594] Updated weights for policy 0, policy_version 12444 (0.0011) +[2024-07-05 16:42:53,251][04594] Updated weights for policy 0, policy_version 12454 (0.0011) +[2024-07-05 16:42:53,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 51015680. Throughput: 0: 2940.8. Samples: 7751204. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:53,942][04005] Avg episode reward: [(0, '49.973')] +[2024-07-05 16:42:56,732][04594] Updated weights for policy 0, policy_version 12464 (0.0011) +[2024-07-05 16:42:58,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 51077120. Throughput: 0: 2939.1. Samples: 7768754. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:42:58,943][04005] Avg episode reward: [(0, '49.986')] +[2024-07-05 16:43:00,210][04594] Updated weights for policy 0, policy_version 12474 (0.0011) +[2024-07-05 16:43:03,688][04594] Updated weights for policy 0, policy_version 12484 (0.0011) +[2024-07-05 16:43:03,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 51134464. Throughput: 0: 2945.9. Samples: 7777576. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:43:03,942][04005] Avg episode reward: [(0, '50.736')] +[2024-07-05 16:43:07,154][04594] Updated weights for policy 0, policy_version 12494 (0.0011) +[2024-07-05 16:43:08,941][04005] Fps is (10 sec: 11878.7, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 51195904. Throughput: 0: 2944.4. Samples: 7795332. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:43:08,942][04005] Avg episode reward: [(0, '48.510')] +[2024-07-05 16:43:10,639][04594] Updated weights for policy 0, policy_version 12504 (0.0012) +[2024-07-05 16:43:13,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 51253248. Throughput: 0: 2942.0. Samples: 7812920. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:43:13,942][04005] Avg episode reward: [(0, '48.920')] +[2024-07-05 16:43:14,110][04594] Updated weights for policy 0, policy_version 12514 (0.0011) +[2024-07-05 16:43:17,607][04594] Updated weights for policy 0, policy_version 12524 (0.0011) +[2024-07-05 16:43:18,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 51310592. Throughput: 0: 2951.5. Samples: 7821884. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:43:18,943][04005] Avg episode reward: [(0, '49.736')] +[2024-07-05 16:43:21,084][04594] Updated weights for policy 0, policy_version 12534 (0.0011) +[2024-07-05 16:43:23,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 51372032. Throughput: 0: 2944.1. Samples: 7839408. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:43:23,942][04005] Avg episode reward: [(0, '49.606')] +[2024-07-05 16:43:24,570][04594] Updated weights for policy 0, policy_version 12544 (0.0012) +[2024-07-05 16:43:28,044][04594] Updated weights for policy 0, policy_version 12554 (0.0011) +[2024-07-05 16:43:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 51429376. Throughput: 0: 2941.7. Samples: 7857004. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:43:28,942][04005] Avg episode reward: [(0, '50.619')] +[2024-07-05 16:43:31,519][04594] Updated weights for policy 0, policy_version 12564 (0.0011) +[2024-07-05 16:43:33,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 51486720. Throughput: 0: 2950.2. Samples: 7865968. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:43:33,942][04005] Avg episode reward: [(0, '50.127')] +[2024-07-05 16:43:35,004][04594] Updated weights for policy 0, policy_version 12574 (0.0011) +[2024-07-05 16:43:38,486][04594] Updated weights for policy 0, policy_version 12584 (0.0012) +[2024-07-05 16:43:38,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 51548160. Throughput: 0: 2940.4. Samples: 7883522. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:43:38,942][04005] Avg episode reward: [(0, '51.213')] +[2024-07-05 16:43:41,970][04594] Updated weights for policy 0, policy_version 12594 (0.0011) +[2024-07-05 16:43:43,942][04005] Fps is (10 sec: 11878.2, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 51605504. Throughput: 0: 2940.5. Samples: 7901078. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:43:43,943][04005] Avg episode reward: [(0, '51.256')] +[2024-07-05 16:43:45,447][04594] Updated weights for policy 0, policy_version 12604 (0.0011) +[2024-07-05 16:43:48,940][04594] Updated weights for policy 0, policy_version 12614 (0.0011) +[2024-07-05 16:43:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.2, 300 sec: 11760.4). Total num frames: 51666944. Throughput: 0: 2943.9. Samples: 7910052. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:43:48,942][04005] Avg episode reward: [(0, '52.539')] +[2024-07-05 16:43:52,424][04594] Updated weights for policy 0, policy_version 12624 (0.0011) +[2024-07-05 16:43:53,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 51724288. Throughput: 0: 2937.9. Samples: 7927540. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:43:53,943][04005] Avg episode reward: [(0, '51.567')] +[2024-07-05 16:43:55,909][04594] Updated weights for policy 0, policy_version 12634 (0.0011) +[2024-07-05 16:43:58,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 51781632. Throughput: 0: 2937.2. Samples: 7945092. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:43:58,942][04005] Avg episode reward: [(0, '50.460')] +[2024-07-05 16:43:59,378][04594] Updated weights for policy 0, policy_version 12644 (0.0011) +[2024-07-05 16:44:02,859][04594] Updated weights for policy 0, policy_version 12654 (0.0011) +[2024-07-05 16:44:03,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 51843072. Throughput: 0: 2938.2. Samples: 7954102. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:03,943][04005] Avg episode reward: [(0, '50.530')] +[2024-07-05 16:44:06,354][04594] Updated weights for policy 0, policy_version 12664 (0.0012) +[2024-07-05 16:44:08,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 51900416. Throughput: 0: 2938.0. Samples: 7971620. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:08,942][04005] Avg episode reward: [(0, '49.469')] +[2024-07-05 16:44:09,838][04594] Updated weights for policy 0, policy_version 12674 (0.0013) +[2024-07-05 16:44:13,327][04594] Updated weights for policy 0, policy_version 12684 (0.0012) +[2024-07-05 16:44:13,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.8, 300 sec: 11746.5). Total num frames: 51957760. Throughput: 0: 2936.1. Samples: 7989128. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:13,943][04005] Avg episode reward: [(0, '51.035')] +[2024-07-05 16:44:16,791][04594] Updated weights for policy 0, policy_version 12694 (0.0011) +[2024-07-05 16:44:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 52019200. Throughput: 0: 2937.0. Samples: 7998132. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:18,942][04005] Avg episode reward: [(0, '49.330')] +[2024-07-05 16:44:20,296][04594] Updated weights for policy 0, policy_version 12704 (0.0011) +[2024-07-05 16:44:23,774][04594] Updated weights for policy 0, policy_version 12714 (0.0013) +[2024-07-05 16:44:23,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11760.4). Total num frames: 52076544. Throughput: 0: 2936.3. Samples: 8015654. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:23,942][04005] Avg episode reward: [(0, '50.318')] +[2024-07-05 16:44:27,254][04594] Updated weights for policy 0, policy_version 12724 (0.0011) +[2024-07-05 16:44:28,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 52133888. Throughput: 0: 2936.8. Samples: 8033234. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:28,942][04005] Avg episode reward: [(0, '50.329')] +[2024-07-05 16:44:30,732][04594] Updated weights for policy 0, policy_version 12734 (0.0011) +[2024-07-05 16:44:33,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 52195328. Throughput: 0: 2936.4. Samples: 8042192. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:33,942][04005] Avg episode reward: [(0, '50.615')] +[2024-07-05 16:44:33,945][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000012743_52195328.pth... +[2024-07-05 16:44:34,019][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000012055_49377280.pth +[2024-07-05 16:44:34,295][04594] Updated weights for policy 0, policy_version 12744 (0.0013) +[2024-07-05 16:44:37,705][04594] Updated weights for policy 0, policy_version 12754 (0.0011) +[2024-07-05 16:44:38,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 52252672. Throughput: 0: 2937.2. Samples: 8059712. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:38,942][04005] Avg episode reward: [(0, '51.924')] +[2024-07-05 16:44:41,197][04594] Updated weights for policy 0, policy_version 12764 (0.0012) +[2024-07-05 16:44:43,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 52310016. Throughput: 0: 2939.5. Samples: 8077372. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:43,942][04005] Avg episode reward: [(0, '52.167')] +[2024-07-05 16:44:44,670][04594] Updated weights for policy 0, policy_version 12774 (0.0011) +[2024-07-05 16:44:48,146][04594] Updated weights for policy 0, policy_version 12784 (0.0011) +[2024-07-05 16:44:48,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 52371456. Throughput: 0: 2936.3. Samples: 8086234. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:48,943][04005] Avg episode reward: [(0, '51.846')] +[2024-07-05 16:44:51,628][04594] Updated weights for policy 0, policy_version 12794 (0.0011) +[2024-07-05 16:44:53,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 52428800. Throughput: 0: 2937.1. Samples: 8103788. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:53,943][04005] Avg episode reward: [(0, '52.008')] +[2024-07-05 16:44:55,105][04594] Updated weights for policy 0, policy_version 12804 (0.0011) +[2024-07-05 16:44:58,588][04594] Updated weights for policy 0, policy_version 12814 (0.0011) +[2024-07-05 16:44:58,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11810.1, 300 sec: 11760.4). Total num frames: 52490240. Throughput: 0: 2945.7. Samples: 8121686. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:44:58,942][04005] Avg episode reward: [(0, '52.319')] +[2024-07-05 16:45:02,076][04594] Updated weights for policy 0, policy_version 12824 (0.0012) +[2024-07-05 16:45:03,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.8, 300 sec: 11760.4). Total num frames: 52547584. Throughput: 0: 2937.9. Samples: 8130338. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:45:03,943][04005] Avg episode reward: [(0, '51.024')] +[2024-07-05 16:45:05,553][04594] Updated weights for policy 0, policy_version 12834 (0.0011) +[2024-07-05 16:45:08,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 11746.5). Total num frames: 52604928. Throughput: 0: 2938.3. Samples: 8147878. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:45:08,942][04005] Avg episode reward: [(0, '49.974')] +[2024-07-05 16:45:09,039][04594] Updated weights for policy 0, policy_version 12844 (0.0011) +[2024-07-05 16:45:12,806][04594] Updated weights for policy 0, policy_version 12854 (0.0013) +[2024-07-05 16:45:13,942][04005] Fps is (10 sec: 11059.3, 60 sec: 11673.6, 300 sec: 11732.6). Total num frames: 52658176. Throughput: 0: 2913.6. Samples: 8164348. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:45:13,943][04005] Avg episode reward: [(0, '49.731')] +[2024-07-05 16:45:17,329][04594] Updated weights for policy 0, policy_version 12864 (0.0015) +[2024-07-05 16:45:18,942][04005] Fps is (10 sec: 9830.3, 60 sec: 11400.5, 300 sec: 11691.0). Total num frames: 52703232. Throughput: 0: 2867.7. Samples: 8171240. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:45:18,943][04005] Avg episode reward: [(0, '50.278')] +[2024-07-05 16:45:21,860][04594] Updated weights for policy 0, policy_version 12874 (0.0018) +[2024-07-05 16:45:23,941][04005] Fps is (10 sec: 9011.2, 60 sec: 11195.7, 300 sec: 11635.4). Total num frames: 52748288. Throughput: 0: 2778.4. Samples: 8184738. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:45:23,942][04005] Avg episode reward: [(0, '49.749')] +[2024-07-05 16:45:26,354][04594] Updated weights for policy 0, policy_version 12884 (0.0016) +[2024-07-05 16:45:28,941][04005] Fps is (10 sec: 9011.3, 60 sec: 10991.0, 300 sec: 11593.8). Total num frames: 52793344. Throughput: 0: 2692.9. Samples: 8198550. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:45:28,943][04005] Avg episode reward: [(0, '50.911')] +[2024-07-05 16:45:30,842][04594] Updated weights for policy 0, policy_version 12894 (0.0015) +[2024-07-05 16:45:33,942][04005] Fps is (10 sec: 9011.2, 60 sec: 10717.8, 300 sec: 11552.1). Total num frames: 52838400. Throughput: 0: 2645.0. Samples: 8205258. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:45:33,943][04005] Avg episode reward: [(0, '51.265')] +[2024-07-05 16:45:35,323][04594] Updated weights for policy 0, policy_version 12904 (0.0015) +[2024-07-05 16:45:38,941][04005] Fps is (10 sec: 9420.7, 60 sec: 10581.3, 300 sec: 11510.5). Total num frames: 52887552. Throughput: 0: 2563.1. Samples: 8219128. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:45:38,942][04005] Avg episode reward: [(0, '49.719')] +[2024-07-05 16:45:39,851][04594] Updated weights for policy 0, policy_version 12914 (0.0015) +[2024-07-05 16:45:43,942][04005] Fps is (10 sec: 9420.5, 60 sec: 10376.5, 300 sec: 11468.8). Total num frames: 52932608. Throughput: 0: 2467.2. Samples: 8232712. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:45:43,943][04005] Avg episode reward: [(0, '51.237')] +[2024-07-05 16:45:44,313][04594] Updated weights for policy 0, policy_version 12924 (0.0016) +[2024-07-05 16:45:48,806][04594] Updated weights for policy 0, policy_version 12934 (0.0016) +[2024-07-05 16:45:48,942][04005] Fps is (10 sec: 9011.1, 60 sec: 10103.5, 300 sec: 11427.1). Total num frames: 52977664. Throughput: 0: 2428.8. Samples: 8239632. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:45:48,942][04005] Avg episode reward: [(0, '51.006')] +[2024-07-05 16:45:53,286][04594] Updated weights for policy 0, policy_version 12944 (0.0016) +[2024-07-05 16:45:53,942][04005] Fps is (10 sec: 9011.5, 60 sec: 9898.7, 300 sec: 11371.6). Total num frames: 53022720. Throughput: 0: 2341.5. Samples: 8253244. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:45:53,942][04005] Avg episode reward: [(0, '51.285')] +[2024-07-05 16:45:57,777][04594] Updated weights for policy 0, policy_version 12954 (0.0015) +[2024-07-05 16:45:58,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9625.6, 300 sec: 11329.9). Total num frames: 53067776. Throughput: 0: 2277.1. Samples: 8266818. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:45:58,943][04005] Avg episode reward: [(0, '51.529')] +[2024-07-05 16:46:02,275][04594] Updated weights for policy 0, policy_version 12964 (0.0017) +[2024-07-05 16:46:03,942][04005] Fps is (10 sec: 9011.0, 60 sec: 9420.8, 300 sec: 11288.3). Total num frames: 53112832. Throughput: 0: 2278.6. Samples: 8273778. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:03,944][04005] Avg episode reward: [(0, '52.277')] +[2024-07-05 16:46:06,742][04594] Updated weights for policy 0, policy_version 12974 (0.0016) +[2024-07-05 16:46:08,942][04005] Fps is (10 sec: 9011.3, 60 sec: 9216.0, 300 sec: 11232.8). Total num frames: 53157888. Throughput: 0: 2281.6. Samples: 8287408. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:08,942][04005] Avg episode reward: [(0, '53.101')] +[2024-07-05 16:46:11,257][04594] Updated weights for policy 0, policy_version 12984 (0.0016) +[2024-07-05 16:46:13,941][04005] Fps is (10 sec: 9011.5, 60 sec: 9079.5, 300 sec: 11191.1). Total num frames: 53202944. Throughput: 0: 2280.9. Samples: 8301192. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:13,942][04005] Avg episode reward: [(0, '52.675')] +[2024-07-05 16:46:15,753][04594] Updated weights for policy 0, policy_version 12994 (0.0018) +[2024-07-05 16:46:18,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9147.8, 300 sec: 11163.3). Total num frames: 53252096. Throughput: 0: 2279.1. Samples: 8307818. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:18,942][04005] Avg episode reward: [(0, '52.811')] +[2024-07-05 16:46:20,229][04594] Updated weights for policy 0, policy_version 13004 (0.0016) +[2024-07-05 16:46:23,942][04005] Fps is (10 sec: 9420.7, 60 sec: 9147.7, 300 sec: 11107.8). Total num frames: 53297152. Throughput: 0: 2279.6. Samples: 8321708. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:23,942][04005] Avg episode reward: [(0, '51.814')] +[2024-07-05 16:46:24,729][04594] Updated weights for policy 0, policy_version 13014 (0.0018) +[2024-07-05 16:46:28,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9147.7, 300 sec: 11066.1). Total num frames: 53342208. Throughput: 0: 2279.0. Samples: 8335268. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:28,943][04005] Avg episode reward: [(0, '50.651')] +[2024-07-05 16:46:29,230][04594] Updated weights for policy 0, policy_version 13024 (0.0019) +[2024-07-05 16:46:33,712][04594] Updated weights for policy 0, policy_version 13034 (0.0017) +[2024-07-05 16:46:33,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 11024.5). Total num frames: 53387264. Throughput: 0: 2279.0. Samples: 8342186. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:33,943][04005] Avg episode reward: [(0, '50.588')] +[2024-07-05 16:46:34,163][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000013035_53391360.pth... +[2024-07-05 16:46:34,251][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000012399_50786304.pth +[2024-07-05 16:46:38,239][04594] Updated weights for policy 0, policy_version 13044 (0.0016) +[2024-07-05 16:46:38,942][04005] Fps is (10 sec: 9010.9, 60 sec: 9079.4, 300 sec: 10968.9). Total num frames: 53432320. Throughput: 0: 2276.9. Samples: 8355706. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:38,943][04005] Avg episode reward: [(0, '50.735')] +[2024-07-05 16:46:42,740][04594] Updated weights for policy 0, policy_version 13054 (0.0016) +[2024-07-05 16:46:43,941][04005] Fps is (10 sec: 9011.4, 60 sec: 9079.5, 300 sec: 10927.3). Total num frames: 53477376. Throughput: 0: 2279.2. Samples: 8369380. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:43,942][04005] Avg episode reward: [(0, '52.144')] +[2024-07-05 16:46:47,209][04594] Updated weights for policy 0, policy_version 13064 (0.0015) +[2024-07-05 16:46:48,941][04005] Fps is (10 sec: 9011.6, 60 sec: 9079.5, 300 sec: 10885.6). Total num frames: 53522432. Throughput: 0: 2277.1. Samples: 8376248. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:48,942][04005] Avg episode reward: [(0, '51.828')] +[2024-07-05 16:46:51,690][04594] Updated weights for policy 0, policy_version 13074 (0.0017) +[2024-07-05 16:46:53,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 10830.1). Total num frames: 53567488. Throughput: 0: 2282.7. Samples: 8390128. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:53,942][04005] Avg episode reward: [(0, '50.998')] +[2024-07-05 16:46:56,200][04594] Updated weights for policy 0, policy_version 13084 (0.0018) +[2024-07-05 16:46:58,941][04005] Fps is (10 sec: 9420.8, 60 sec: 9147.8, 300 sec: 10802.3). Total num frames: 53616640. Throughput: 0: 2278.6. Samples: 8403728. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:46:58,942][04005] Avg episode reward: [(0, '50.819')] +[2024-07-05 16:47:00,699][04594] Updated weights for policy 0, policy_version 13094 (0.0017) +[2024-07-05 16:47:03,942][04005] Fps is (10 sec: 9420.8, 60 sec: 9147.8, 300 sec: 10760.7). Total num frames: 53661696. Throughput: 0: 2283.5. Samples: 8410578. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:03,943][04005] Avg episode reward: [(0, '51.016')] +[2024-07-05 16:47:05,198][04594] Updated weights for policy 0, policy_version 13104 (0.0017) +[2024-07-05 16:47:08,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 10705.1). Total num frames: 53706752. Throughput: 0: 2277.5. Samples: 8424196. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:08,942][04005] Avg episode reward: [(0, '51.758')] +[2024-07-05 16:47:09,721][04594] Updated weights for policy 0, policy_version 13114 (0.0019) +[2024-07-05 16:47:13,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 10663.5). Total num frames: 53751808. Throughput: 0: 2276.8. Samples: 8437724. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:13,942][04005] Avg episode reward: [(0, '52.306')] +[2024-07-05 16:47:14,220][04594] Updated weights for policy 0, policy_version 13124 (0.0015) +[2024-07-05 16:47:18,690][04594] Updated weights for policy 0, policy_version 13134 (0.0016) +[2024-07-05 16:47:18,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 10621.8). Total num frames: 53796864. Throughput: 0: 2276.4. Samples: 8444622. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:18,942][04005] Avg episode reward: [(0, '51.881')] +[2024-07-05 16:47:23,151][04594] Updated weights for policy 0, policy_version 13144 (0.0017) +[2024-07-05 16:47:23,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 10566.3). Total num frames: 53841920. Throughput: 0: 2278.8. Samples: 8458252. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:23,942][04005] Avg episode reward: [(0, '50.611')] +[2024-07-05 16:47:27,617][04594] Updated weights for policy 0, policy_version 13154 (0.0015) +[2024-07-05 16:47:28,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 10524.6). Total num frames: 53886976. Throughput: 0: 2284.8. Samples: 8472196. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:28,942][04005] Avg episode reward: [(0, '50.096')] +[2024-07-05 16:47:32,075][04594] Updated weights for policy 0, policy_version 13164 (0.0015) +[2024-07-05 16:47:33,942][04005] Fps is (10 sec: 9420.8, 60 sec: 9147.7, 300 sec: 10496.9). Total num frames: 53936128. Throughput: 0: 2282.1. Samples: 8478942. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:33,942][04005] Avg episode reward: [(0, '49.168')] +[2024-07-05 16:47:36,545][04594] Updated weights for policy 0, policy_version 13174 (0.0017) +[2024-07-05 16:47:38,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9147.8, 300 sec: 10441.3). Total num frames: 53981184. Throughput: 0: 2281.1. Samples: 8492776. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:38,942][04005] Avg episode reward: [(0, '51.047')] +[2024-07-05 16:47:41,027][04594] Updated weights for policy 0, policy_version 13184 (0.0017) +[2024-07-05 16:47:43,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 10399.7). Total num frames: 54026240. Throughput: 0: 2280.4. Samples: 8506344. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:43,943][04005] Avg episode reward: [(0, '52.220')] +[2024-07-05 16:47:45,504][04594] Updated weights for policy 0, policy_version 13194 (0.0015) +[2024-07-05 16:47:48,942][04005] Fps is (10 sec: 9010.8, 60 sec: 9147.7, 300 sec: 10358.0). Total num frames: 54071296. Throughput: 0: 2282.6. Samples: 8513294. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:48,943][04005] Avg episode reward: [(0, '52.869')] +[2024-07-05 16:47:49,988][04594] Updated weights for policy 0, policy_version 13204 (0.0015) +[2024-07-05 16:47:53,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 10302.5). Total num frames: 54116352. Throughput: 0: 2282.5. Samples: 8526910. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:53,943][04005] Avg episode reward: [(0, '51.299')] +[2024-07-05 16:47:54,440][04594] Updated weights for policy 0, policy_version 13214 (0.0015) +[2024-07-05 16:47:58,892][04594] Updated weights for policy 0, policy_version 13224 (0.0016) +[2024-07-05 16:47:58,941][04005] Fps is (10 sec: 9421.3, 60 sec: 9147.7, 300 sec: 10274.7). Total num frames: 54165504. Throughput: 0: 2292.5. Samples: 8540886. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:47:58,942][04005] Avg episode reward: [(0, '49.457')] +[2024-07-05 16:48:03,341][04594] Updated weights for policy 0, policy_version 13234 (0.0015) +[2024-07-05 16:48:03,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9147.7, 300 sec: 10219.2). Total num frames: 54210560. Throughput: 0: 2294.8. Samples: 8547886. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:03,942][04005] Avg episode reward: [(0, '49.262')] +[2024-07-05 16:48:07,818][04594] Updated weights for policy 0, policy_version 13244 (0.0016) +[2024-07-05 16:48:08,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9147.7, 300 sec: 10177.5). Total num frames: 54255616. Throughput: 0: 2294.4. Samples: 8561502. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:08,943][04005] Avg episode reward: [(0, '49.248')] +[2024-07-05 16:48:12,322][04594] Updated weights for policy 0, policy_version 13254 (0.0016) +[2024-07-05 16:48:13,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9147.7, 300 sec: 10135.9). Total num frames: 54300672. Throughput: 0: 2285.7. Samples: 8575054. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:13,943][04005] Avg episode reward: [(0, '49.436')] +[2024-07-05 16:48:16,797][04594] Updated weights for policy 0, policy_version 13264 (0.0016) +[2024-07-05 16:48:18,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 10080.3). Total num frames: 54345728. Throughput: 0: 2291.3. Samples: 8582050. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:18,942][04005] Avg episode reward: [(0, '50.543')] +[2024-07-05 16:48:21,269][04594] Updated weights for policy 0, policy_version 13274 (0.0016) +[2024-07-05 16:48:23,942][04005] Fps is (10 sec: 9420.8, 60 sec: 9216.0, 300 sec: 10052.6). Total num frames: 54394880. Throughput: 0: 2290.3. Samples: 8595838. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:23,943][04005] Avg episode reward: [(0, '50.589')] +[2024-07-05 16:48:25,738][04594] Updated weights for policy 0, policy_version 13284 (0.0015) +[2024-07-05 16:48:28,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9216.0, 300 sec: 10010.9). Total num frames: 54439936. Throughput: 0: 2293.8. Samples: 8609566. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:28,943][04005] Avg episode reward: [(0, '50.215')] +[2024-07-05 16:48:30,217][04594] Updated weights for policy 0, policy_version 13294 (0.0016) +[2024-07-05 16:48:33,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 9955.4). Total num frames: 54484992. Throughput: 0: 2292.7. Samples: 8616464. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:33,942][04005] Avg episode reward: [(0, '51.331')] +[2024-07-05 16:48:34,282][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000013303_54489088.pth... +[2024-07-05 16:48:34,370][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000012743_52195328.pth +[2024-07-05 16:48:34,758][04594] Updated weights for policy 0, policy_version 13304 (0.0016) +[2024-07-05 16:48:38,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 9913.7). Total num frames: 54530048. Throughput: 0: 2291.0. Samples: 8630006. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:38,942][04005] Avg episode reward: [(0, '50.764')] +[2024-07-05 16:48:39,230][04594] Updated weights for policy 0, policy_version 13314 (0.0016) +[2024-07-05 16:48:43,709][04594] Updated weights for policy 0, policy_version 13324 (0.0015) +[2024-07-05 16:48:43,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 9858.2). Total num frames: 54575104. Throughput: 0: 2282.2. Samples: 8643586. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:43,942][04005] Avg episode reward: [(0, '51.252')] +[2024-07-05 16:48:48,226][04594] Updated weights for policy 0, policy_version 13334 (0.0016) +[2024-07-05 16:48:48,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9147.8, 300 sec: 9816.5). Total num frames: 54620160. Throughput: 0: 2280.6. Samples: 8650512. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:48,943][04005] Avg episode reward: [(0, '51.421')] +[2024-07-05 16:48:52,720][04594] Updated weights for policy 0, policy_version 13344 (0.0014) +[2024-07-05 16:48:53,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 9774.9). Total num frames: 54665216. Throughput: 0: 2279.2. Samples: 8664064. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:53,942][04005] Avg episode reward: [(0, '52.238')] +[2024-07-05 16:48:57,180][04594] Updated weights for policy 0, policy_version 13354 (0.0016) +[2024-07-05 16:48:58,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9719.3). Total num frames: 54710272. Throughput: 0: 2286.4. Samples: 8677940. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:48:58,943][04005] Avg episode reward: [(0, '50.623')] +[2024-07-05 16:49:01,657][04594] Updated weights for policy 0, policy_version 13364 (0.0016) +[2024-07-05 16:49:03,941][04005] Fps is (10 sec: 9420.8, 60 sec: 9147.7, 300 sec: 9691.6). Total num frames: 54759424. Throughput: 0: 2278.5. Samples: 8684582. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:49:03,942][04005] Avg episode reward: [(0, '49.951')] +[2024-07-05 16:49:06,162][04594] Updated weights for policy 0, policy_version 13374 (0.0016) +[2024-07-05 16:49:08,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9147.7, 300 sec: 9649.9). Total num frames: 54804480. Throughput: 0: 2279.0. Samples: 8698392. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:49:08,942][04005] Avg episode reward: [(0, '49.430')] +[2024-07-05 16:49:10,700][04594] Updated weights for policy 0, policy_version 13384 (0.0017) +[2024-07-05 16:49:13,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 9594.4). Total num frames: 54849536. Throughput: 0: 2274.8. Samples: 8711932. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:49:13,942][04005] Avg episode reward: [(0, '48.922')] +[2024-07-05 16:49:15,230][04594] Updated weights for policy 0, policy_version 13394 (0.0015) +[2024-07-05 16:49:18,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9147.7, 300 sec: 9552.7). Total num frames: 54894592. Throughput: 0: 2275.5. Samples: 8718860. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:49:18,943][04005] Avg episode reward: [(0, '51.042')] +[2024-07-05 16:49:19,677][04594] Updated weights for policy 0, policy_version 13404 (0.0016) +[2024-07-05 16:49:23,942][04005] Fps is (10 sec: 9010.7, 60 sec: 9079.4, 300 sec: 9511.0). Total num frames: 54939648. Throughput: 0: 2276.6. Samples: 8732456. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:49:23,943][04005] Avg episode reward: [(0, '50.725')] +[2024-07-05 16:49:24,163][04594] Updated weights for policy 0, policy_version 13414 (0.0016) +[2024-07-05 16:49:28,680][04594] Updated weights for policy 0, policy_version 13424 (0.0015) +[2024-07-05 16:49:28,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9455.5). Total num frames: 54984704. Throughput: 0: 2275.3. Samples: 8745976. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:49:28,943][04005] Avg episode reward: [(0, '51.129')] +[2024-07-05 16:49:33,145][04594] Updated weights for policy 0, policy_version 13434 (0.0017) +[2024-07-05 16:49:33,942][04005] Fps is (10 sec: 9011.6, 60 sec: 9079.5, 300 sec: 9413.9). Total num frames: 55029760. Throughput: 0: 2276.2. Samples: 8752942. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:49:33,942][04005] Avg episode reward: [(0, '52.316')] +[2024-07-05 16:49:37,608][04594] Updated weights for policy 0, policy_version 13444 (0.0016) +[2024-07-05 16:49:38,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.4, 300 sec: 9372.2). Total num frames: 55074816. Throughput: 0: 2281.6. Samples: 8766738. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:49:38,943][04005] Avg episode reward: [(0, '51.554')] +[2024-07-05 16:49:42,104][04594] Updated weights for policy 0, policy_version 13454 (0.0016) +[2024-07-05 16:49:43,942][04005] Fps is (10 sec: 9420.7, 60 sec: 9147.7, 300 sec: 9330.5). Total num frames: 55123968. Throughput: 0: 2279.0. Samples: 8780494. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:49:43,942][04005] Avg episode reward: [(0, '52.439')] +[2024-07-05 16:49:46,582][04594] Updated weights for policy 0, policy_version 13464 (0.0015) +[2024-07-05 16:49:48,941][04005] Fps is (10 sec: 9421.0, 60 sec: 9147.8, 300 sec: 9288.9). Total num frames: 55169024. Throughput: 0: 2285.1. Samples: 8787410. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:49:48,942][04005] Avg episode reward: [(0, '52.630')] +[2024-07-05 16:49:51,036][04594] Updated weights for policy 0, policy_version 13474 (0.0017) +[2024-07-05 16:49:53,942][04005] Fps is (10 sec: 9010.8, 60 sec: 9147.7, 300 sec: 9233.3). Total num frames: 55214080. Throughput: 0: 2280.9. Samples: 8801032. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:49:53,944][04005] Avg episode reward: [(0, '51.626')] +[2024-07-05 16:49:55,536][04594] Updated weights for policy 0, policy_version 13484 (0.0017) +[2024-07-05 16:49:58,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9147.7, 300 sec: 9191.7). Total num frames: 55259136. Throughput: 0: 2280.7. Samples: 8814562. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:49:58,942][04005] Avg episode reward: [(0, '51.039')] +[2024-07-05 16:50:00,040][04594] Updated weights for policy 0, policy_version 13494 (0.0015) +[2024-07-05 16:50:03,941][04005] Fps is (10 sec: 9011.8, 60 sec: 9079.5, 300 sec: 9150.0). Total num frames: 55304192. Throughput: 0: 2281.4. Samples: 8821522. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:50:03,942][04005] Avg episode reward: [(0, '50.887')] +[2024-07-05 16:50:04,541][04594] Updated weights for policy 0, policy_version 13504 (0.0018) +[2024-07-05 16:50:08,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.4, 300 sec: 9122.3). Total num frames: 55349248. Throughput: 0: 2283.8. Samples: 8835224. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:50:08,942][04005] Avg episode reward: [(0, '50.760')] +[2024-07-05 16:50:08,984][04594] Updated weights for policy 0, policy_version 13514 (0.0016) +[2024-07-05 16:50:13,424][04594] Updated weights for policy 0, policy_version 13524 (0.0016) +[2024-07-05 16:50:13,941][04005] Fps is (10 sec: 9420.7, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 55398400. Throughput: 0: 2292.1. Samples: 8849122. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:50:13,942][04005] Avg episode reward: [(0, '50.254')] +[2024-07-05 16:50:17,910][04594] Updated weights for policy 0, policy_version 13534 (0.0018) +[2024-07-05 16:50:18,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 55443456. Throughput: 0: 2292.0. Samples: 8856080. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:50:18,942][04005] Avg episode reward: [(0, '51.375')] +[2024-07-05 16:50:22,394][04594] Updated weights for policy 0, policy_version 13544 (0.0015) +[2024-07-05 16:50:23,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9147.8, 300 sec: 9136.2). Total num frames: 55488512. Throughput: 0: 2287.0. Samples: 8869654. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:50:23,942][04005] Avg episode reward: [(0, '51.612')] +[2024-07-05 16:50:26,877][04594] Updated weights for policy 0, policy_version 13554 (0.0018) +[2024-07-05 16:50:28,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.8, 300 sec: 9136.2). Total num frames: 55533568. Throughput: 0: 2282.6. Samples: 8883212. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:50:28,942][04005] Avg episode reward: [(0, '51.397')] +[2024-07-05 16:50:31,365][04594] Updated weights for policy 0, policy_version 13564 (0.0016) +[2024-07-05 16:50:33,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 9122.3). Total num frames: 55578624. Throughput: 0: 2283.2. Samples: 8890154. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:50:33,942][04005] Avg episode reward: [(0, '50.968')] +[2024-07-05 16:50:34,061][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000013570_55582720.pth... +[2024-07-05 16:50:34,149][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000013035_53391360.pth +[2024-07-05 16:50:35,859][04594] Updated weights for policy 0, policy_version 13574 (0.0018) +[2024-07-05 16:50:38,942][04005] Fps is (10 sec: 9010.9, 60 sec: 9147.7, 300 sec: 9122.3). Total num frames: 55623680. Throughput: 0: 2282.9. Samples: 8903760. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:50:38,943][04005] Avg episode reward: [(0, '50.781')] +[2024-07-05 16:50:40,344][04594] Updated weights for policy 0, policy_version 13584 (0.0016) +[2024-07-05 16:50:43,941][04005] Fps is (10 sec: 9420.7, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 55672832. Throughput: 0: 2290.3. Samples: 8917624. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:50:43,943][04005] Avg episode reward: [(0, '51.018')] +[2024-07-05 16:50:44,818][04594] Updated weights for policy 0, policy_version 13594 (0.0015) +[2024-07-05 16:50:48,941][04005] Fps is (10 sec: 9421.1, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 55717888. Throughput: 0: 2289.0. Samples: 8924526. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:50:48,943][04005] Avg episode reward: [(0, '51.151')] +[2024-07-05 16:50:49,294][04594] Updated weights for policy 0, policy_version 13604 (0.0017) +[2024-07-05 16:50:53,745][04594] Updated weights for policy 0, policy_version 13614 (0.0016) +[2024-07-05 16:50:53,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9147.8, 300 sec: 9136.2). Total num frames: 55762944. Throughput: 0: 2289.0. Samples: 8938228. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:50:53,943][04005] Avg episode reward: [(0, '53.581')] +[2024-07-05 16:50:58,242][04594] Updated weights for policy 0, policy_version 13624 (0.0018) +[2024-07-05 16:50:58,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 55808000. Throughput: 0: 2281.0. Samples: 8951766. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:50:58,943][04005] Avg episode reward: [(0, '52.737')] +[2024-07-05 16:51:02,744][04594] Updated weights for policy 0, policy_version 13634 (0.0020) +[2024-07-05 16:51:03,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 55853056. Throughput: 0: 2280.4. Samples: 8958700. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:51:03,943][04005] Avg episode reward: [(0, '52.388')] +[2024-07-05 16:51:07,237][04594] Updated weights for policy 0, policy_version 13644 (0.0017) +[2024-07-05 16:51:08,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 55898112. Throughput: 0: 2280.6. Samples: 8972280. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:51:08,943][04005] Avg episode reward: [(0, '51.691')] +[2024-07-05 16:51:11,731][04594] Updated weights for policy 0, policy_version 13654 (0.0015) +[2024-07-05 16:51:13,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9122.3). Total num frames: 55943168. Throughput: 0: 2287.1. Samples: 8986132. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:51:13,942][04005] Avg episode reward: [(0, '50.252')] +[2024-07-05 16:51:16,246][04594] Updated weights for policy 0, policy_version 13664 (0.0016) +[2024-07-05 16:51:18,941][04005] Fps is (10 sec: 9421.0, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 55992320. Throughput: 0: 2279.7. Samples: 8992742. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:51:18,943][04005] Avg episode reward: [(0, '50.998')] +[2024-07-05 16:51:20,747][04594] Updated weights for policy 0, policy_version 13674 (0.0017) +[2024-07-05 16:51:23,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 56037376. Throughput: 0: 2286.1. Samples: 9006634. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:51:23,942][04005] Avg episode reward: [(0, '51.222')] +[2024-07-05 16:51:25,235][04594] Updated weights for policy 0, policy_version 13684 (0.0016) +[2024-07-05 16:51:28,941][04005] Fps is (10 sec: 9011.1, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 56082432. Throughput: 0: 2280.8. Samples: 9020260. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:51:28,942][04005] Avg episode reward: [(0, '50.864')] +[2024-07-05 16:51:29,705][04594] Updated weights for policy 0, policy_version 13694 (0.0016) +[2024-07-05 16:51:33,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 56127488. Throughput: 0: 2280.9. Samples: 9027166. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:51:33,942][04005] Avg episode reward: [(0, '51.130')] +[2024-07-05 16:51:34,226][04594] Updated weights for policy 0, policy_version 13704 (0.0017) +[2024-07-05 16:51:38,720][04594] Updated weights for policy 0, policy_version 13714 (0.0016) +[2024-07-05 16:51:38,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9147.8, 300 sec: 9136.2). Total num frames: 56172544. Throughput: 0: 2277.4. Samples: 9040712. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:51:38,943][04005] Avg episode reward: [(0, '51.682')] +[2024-07-05 16:51:43,234][04594] Updated weights for policy 0, policy_version 13724 (0.0017) +[2024-07-05 16:51:43,941][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9136.2). Total num frames: 56217600. Throughput: 0: 2277.2. Samples: 9054238. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:51:43,943][04005] Avg episode reward: [(0, '52.592')] +[2024-07-05 16:51:47,761][04594] Updated weights for policy 0, policy_version 13734 (0.0016) +[2024-07-05 16:51:48,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9136.2). Total num frames: 56262656. Throughput: 0: 2277.0. Samples: 9061166. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:51:48,943][04005] Avg episode reward: [(0, '52.176')] +[2024-07-05 16:51:52,260][04594] Updated weights for policy 0, policy_version 13744 (0.0018) +[2024-07-05 16:51:53,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9122.3). Total num frames: 56307712. Throughput: 0: 2275.6. Samples: 9074680. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:51:53,943][04005] Avg episode reward: [(0, '51.238')] +[2024-07-05 16:51:56,756][04594] Updated weights for policy 0, policy_version 13754 (0.0015) +[2024-07-05 16:51:58,941][04005] Fps is (10 sec: 9011.4, 60 sec: 9079.5, 300 sec: 9122.3). Total num frames: 56352768. Throughput: 0: 2275.0. Samples: 9088508. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:51:58,942][04005] Avg episode reward: [(0, '50.152')] +[2024-07-05 16:52:01,223][04594] Updated weights for policy 0, policy_version 13764 (0.0016) +[2024-07-05 16:52:03,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9147.8, 300 sec: 9136.2). Total num frames: 56401920. Throughput: 0: 2276.4. Samples: 9095180. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:52:03,943][04005] Avg episode reward: [(0, '49.208')] +[2024-07-05 16:52:05,742][04594] Updated weights for policy 0, policy_version 13774 (0.0019) +[2024-07-05 16:52:08,941][04005] Fps is (10 sec: 9420.8, 60 sec: 9147.8, 300 sec: 9136.2). Total num frames: 56446976. Throughput: 0: 2275.0. Samples: 9109008. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:52:08,942][04005] Avg episode reward: [(0, '49.159')] +[2024-07-05 16:52:10,252][04594] Updated weights for policy 0, policy_version 13784 (0.0016) +[2024-07-05 16:52:13,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 56492032. Throughput: 0: 2273.9. Samples: 9122584. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:52:13,943][04005] Avg episode reward: [(0, '50.414')] +[2024-07-05 16:52:14,749][04594] Updated weights for policy 0, policy_version 13794 (0.0018) +[2024-07-05 16:52:18,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9136.2). Total num frames: 56537088. Throughput: 0: 2273.8. Samples: 9129486. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:52:18,942][04005] Avg episode reward: [(0, '50.322')] +[2024-07-05 16:52:19,239][04594] Updated weights for policy 0, policy_version 13804 (0.0016) +[2024-07-05 16:52:23,727][04594] Updated weights for policy 0, policy_version 13814 (0.0015) +[2024-07-05 16:52:23,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.4, 300 sec: 9136.2). Total num frames: 56582144. Throughput: 0: 2275.3. Samples: 9143102. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 16:52:23,943][04005] Avg episode reward: [(0, '51.276')] +[2024-07-05 16:52:28,236][04594] Updated weights for policy 0, policy_version 13824 (0.0017) +[2024-07-05 16:52:28,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9122.3). Total num frames: 56627200. Throughput: 0: 2274.8. Samples: 9156604. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:52:28,942][04005] Avg episode reward: [(0, '50.709')] +[2024-07-05 16:52:32,762][04594] Updated weights for policy 0, policy_version 13834 (0.0016) +[2024-07-05 16:52:33,942][04005] Fps is (10 sec: 9010.7, 60 sec: 9079.4, 300 sec: 9122.3). Total num frames: 56672256. Throughput: 0: 2273.7. Samples: 9163484. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:52:33,943][04005] Avg episode reward: [(0, '51.922')] +[2024-07-05 16:52:34,111][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000013837_56676352.pth... +[2024-07-05 16:52:34,203][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000013303_54489088.pth +[2024-07-05 16:52:37,253][04594] Updated weights for policy 0, policy_version 13844 (0.0016) +[2024-07-05 16:52:38,942][04005] Fps is (10 sec: 9010.8, 60 sec: 9079.4, 300 sec: 9122.3). Total num frames: 56717312. Throughput: 0: 2275.3. Samples: 9177068. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:52:38,943][04005] Avg episode reward: [(0, '51.802')] +[2024-07-05 16:52:41,730][04594] Updated weights for policy 0, policy_version 13854 (0.0016) +[2024-07-05 16:52:43,942][04005] Fps is (10 sec: 9011.7, 60 sec: 9079.5, 300 sec: 9122.3). Total num frames: 56762368. Throughput: 0: 2276.3. Samples: 9190942. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:52:43,943][04005] Avg episode reward: [(0, '49.878')] +[2024-07-05 16:52:46,213][04594] Updated weights for policy 0, policy_version 13864 (0.0014) +[2024-07-05 16:52:48,941][04005] Fps is (10 sec: 9421.2, 60 sec: 9147.7, 300 sec: 9136.2). Total num frames: 56811520. Throughput: 0: 2274.4. Samples: 9197530. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:52:48,942][04005] Avg episode reward: [(0, '50.597')] +[2024-07-05 16:52:50,697][04594] Updated weights for policy 0, policy_version 13874 (0.0016) +[2024-07-05 16:52:53,942][04005] Fps is (10 sec: 9420.8, 60 sec: 9147.7, 300 sec: 9122.3). Total num frames: 56856576. Throughput: 0: 2275.3. Samples: 9211396. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:52:53,943][04005] Avg episode reward: [(0, '50.482')] +[2024-07-05 16:52:55,218][04594] Updated weights for policy 0, policy_version 13884 (0.0015) +[2024-07-05 16:52:58,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9147.7, 300 sec: 9122.3). Total num frames: 56901632. Throughput: 0: 2274.9. Samples: 9224954. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:52:58,942][04005] Avg episode reward: [(0, '50.349')] +[2024-07-05 16:52:59,737][04594] Updated weights for policy 0, policy_version 13894 (0.0019) +[2024-07-05 16:53:03,942][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.5, 300 sec: 9122.3). Total num frames: 56946688. Throughput: 0: 2275.2. Samples: 9231868. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:03,942][04005] Avg episode reward: [(0, '50.640')] +[2024-07-05 16:53:04,239][04594] Updated weights for policy 0, policy_version 13904 (0.0016) +[2024-07-05 16:53:08,731][04594] Updated weights for policy 0, policy_version 13914 (0.0015) +[2024-07-05 16:53:08,942][04005] Fps is (10 sec: 9010.7, 60 sec: 9079.4, 300 sec: 9122.3). Total num frames: 56991744. Throughput: 0: 2274.0. Samples: 9245434. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:08,943][04005] Avg episode reward: [(0, '50.690')] +[2024-07-05 16:53:13,233][04594] Updated weights for policy 0, policy_version 13924 (0.0017) +[2024-07-05 16:53:13,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9122.3). Total num frames: 57036800. Throughput: 0: 2275.4. Samples: 9258996. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:13,943][04005] Avg episode reward: [(0, '50.405')] +[2024-07-05 16:53:17,753][04594] Updated weights for policy 0, policy_version 13934 (0.0016) +[2024-07-05 16:53:18,941][04005] Fps is (10 sec: 9011.7, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57081856. Throughput: 0: 2276.1. Samples: 9265908. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:18,942][04005] Avg episode reward: [(0, '50.526')] +[2024-07-05 16:53:22,240][04594] Updated weights for policy 0, policy_version 13944 (0.0017) +[2024-07-05 16:53:23,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57126912. Throughput: 0: 2274.9. Samples: 9279436. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:23,942][04005] Avg episode reward: [(0, '50.178')] +[2024-07-05 16:53:26,764][04594] Updated weights for policy 0, policy_version 13954 (0.0015) +[2024-07-05 16:53:28,942][04005] Fps is (10 sec: 9010.9, 60 sec: 9079.4, 300 sec: 9108.4). Total num frames: 57171968. Throughput: 0: 2275.0. Samples: 9293318. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:28,943][04005] Avg episode reward: [(0, '49.921')] +[2024-07-05 16:53:31,251][04594] Updated weights for policy 0, policy_version 13964 (0.0016) +[2024-07-05 16:53:33,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.6, 300 sec: 9108.4). Total num frames: 57217024. Throughput: 0: 2276.0. Samples: 9299948. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:33,943][04005] Avg episode reward: [(0, '50.283')] +[2024-07-05 16:53:35,782][04594] Updated weights for policy 0, policy_version 13974 (0.0016) +[2024-07-05 16:53:38,941][04005] Fps is (10 sec: 9421.1, 60 sec: 9147.8, 300 sec: 9122.3). Total num frames: 57266176. Throughput: 0: 2273.9. Samples: 9313722. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:38,942][04005] Avg episode reward: [(0, '49.230')] +[2024-07-05 16:53:40,309][04594] Updated weights for policy 0, policy_version 13984 (0.0017) +[2024-07-05 16:53:43,942][04005] Fps is (10 sec: 9420.7, 60 sec: 9147.7, 300 sec: 9122.3). Total num frames: 57311232. Throughput: 0: 2274.3. Samples: 9327300. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:43,943][04005] Avg episode reward: [(0, '49.725')] +[2024-07-05 16:53:44,804][04594] Updated weights for policy 0, policy_version 13994 (0.0016) +[2024-07-05 16:53:48,941][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9122.3). Total num frames: 57356288. Throughput: 0: 2271.6. Samples: 9334092. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:48,942][04005] Avg episode reward: [(0, '50.346')] +[2024-07-05 16:53:49,316][04594] Updated weights for policy 0, policy_version 14004 (0.0015) +[2024-07-05 16:53:53,806][04594] Updated weights for policy 0, policy_version 14014 (0.0016) +[2024-07-05 16:53:53,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.4, 300 sec: 9122.3). Total num frames: 57401344. Throughput: 0: 2273.9. Samples: 9347760. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:53,943][04005] Avg episode reward: [(0, '50.250')] +[2024-07-05 16:53:58,311][04594] Updated weights for policy 0, policy_version 14024 (0.0015) +[2024-07-05 16:53:58,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.4, 300 sec: 9108.4). Total num frames: 57446400. Throughput: 0: 2273.2. Samples: 9361288. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:53:58,942][04005] Avg episode reward: [(0, '51.446')] +[2024-07-05 16:54:02,815][04594] Updated weights for policy 0, policy_version 14034 (0.0019) +[2024-07-05 16:54:03,942][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57491456. Throughput: 0: 2272.3. Samples: 9368162. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:03,942][04005] Avg episode reward: [(0, '50.718')] +[2024-07-05 16:54:07,352][04594] Updated weights for policy 0, policy_version 14044 (0.0017) +[2024-07-05 16:54:08,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57536512. Throughput: 0: 2271.6. Samples: 9381656. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:08,942][04005] Avg episode reward: [(0, '50.418')] +[2024-07-05 16:54:11,891][04594] Updated weights for policy 0, policy_version 14054 (0.0018) +[2024-07-05 16:54:13,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57581568. Throughput: 0: 2263.3. Samples: 9395164. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:13,942][04005] Avg episode reward: [(0, '49.869')] +[2024-07-05 16:54:16,374][04594] Updated weights for policy 0, policy_version 14064 (0.0016) +[2024-07-05 16:54:18,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57626624. Throughput: 0: 2269.8. Samples: 9402088. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:18,942][04005] Avg episode reward: [(0, '49.918')] +[2024-07-05 16:54:20,881][04594] Updated weights for policy 0, policy_version 14074 (0.0016) +[2024-07-05 16:54:23,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57671680. Throughput: 0: 2264.9. Samples: 9415642. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:23,942][04005] Avg episode reward: [(0, '50.217')] +[2024-07-05 16:54:25,376][04594] Updated weights for policy 0, policy_version 14084 (0.0016) +[2024-07-05 16:54:28,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57716736. Throughput: 0: 2271.6. Samples: 9429522. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:28,943][04005] Avg episode reward: [(0, '50.691')] +[2024-07-05 16:54:29,885][04594] Updated weights for policy 0, policy_version 14094 (0.0015) +[2024-07-05 16:54:33,941][04005] Fps is (10 sec: 9420.8, 60 sec: 9147.8, 300 sec: 9122.3). Total num frames: 57765888. Throughput: 0: 2267.1. Samples: 9436112. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:33,942][04005] Avg episode reward: [(0, '51.438')] +[2024-07-05 16:54:33,947][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000014103_57765888.pth... +[2024-07-05 16:54:34,034][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000013570_55582720.pth +[2024-07-05 16:54:34,419][04594] Updated weights for policy 0, policy_version 14104 (0.0017) +[2024-07-05 16:54:38,894][04594] Updated weights for policy 0, policy_version 14114 (0.0018) +[2024-07-05 16:54:38,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57810944. Throughput: 0: 2270.9. Samples: 9449948. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:38,942][04005] Avg episode reward: [(0, '51.141')] +[2024-07-05 16:54:43,415][04594] Updated weights for policy 0, policy_version 14124 (0.0017) +[2024-07-05 16:54:43,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57856000. Throughput: 0: 2270.6. Samples: 9463464. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:43,943][04005] Avg episode reward: [(0, '49.823')] +[2024-07-05 16:54:47,908][04594] Updated weights for policy 0, policy_version 14134 (0.0016) +[2024-07-05 16:54:48,942][04005] Fps is (10 sec: 9011.0, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57901056. Throughput: 0: 2271.7. Samples: 9470388. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:48,943][04005] Avg episode reward: [(0, '48.254')] +[2024-07-05 16:54:52,386][04594] Updated weights for policy 0, policy_version 14144 (0.0016) +[2024-07-05 16:54:53,942][04005] Fps is (10 sec: 9010.9, 60 sec: 9079.4, 300 sec: 9108.4). Total num frames: 57946112. Throughput: 0: 2274.6. Samples: 9484012. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:53,943][04005] Avg episode reward: [(0, '47.918')] +[2024-07-05 16:54:56,911][04594] Updated weights for policy 0, policy_version 14154 (0.0015) +[2024-07-05 16:54:58,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 57991168. Throughput: 0: 2273.8. Samples: 9497486. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:54:58,943][04005] Avg episode reward: [(0, '47.725')] +[2024-07-05 16:55:01,427][04594] Updated weights for policy 0, policy_version 14164 (0.0015) +[2024-07-05 16:55:03,942][04005] Fps is (10 sec: 9011.5, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 58036224. Throughput: 0: 2273.8. Samples: 9504410. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:55:03,943][04005] Avg episode reward: [(0, '48.965')] +[2024-07-05 16:55:05,949][04594] Updated weights for policy 0, policy_version 14174 (0.0016) +[2024-07-05 16:55:08,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58081280. Throughput: 0: 2272.6. Samples: 9517908. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:55:08,942][04005] Avg episode reward: [(0, '49.137')] +[2024-07-05 16:55:10,463][04594] Updated weights for policy 0, policy_version 14184 (0.0015) +[2024-07-05 16:55:13,942][04005] Fps is (10 sec: 9011.0, 60 sec: 9079.4, 300 sec: 9094.5). Total num frames: 58126336. Throughput: 0: 2266.2. Samples: 9531502. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:55:13,943][04005] Avg episode reward: [(0, '49.590')] +[2024-07-05 16:55:14,979][04594] Updated weights for policy 0, policy_version 14194 (0.0017) +[2024-07-05 16:55:18,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58171392. Throughput: 0: 2271.0. Samples: 9538306. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:55:18,943][04005] Avg episode reward: [(0, '51.474')] +[2024-07-05 16:55:19,481][04594] Updated weights for policy 0, policy_version 14204 (0.0016) +[2024-07-05 16:55:23,942][04005] Fps is (10 sec: 9011.4, 60 sec: 9079.4, 300 sec: 9094.5). Total num frames: 58216448. Throughput: 0: 2265.6. Samples: 9551902. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:55:23,943][04005] Avg episode reward: [(0, '51.677')] +[2024-07-05 16:55:23,994][04594] Updated weights for policy 0, policy_version 14214 (0.0017) +[2024-07-05 16:55:28,520][04594] Updated weights for policy 0, policy_version 14224 (0.0015) +[2024-07-05 16:55:28,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58261504. Throughput: 0: 2270.5. Samples: 9565636. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:55:28,942][04005] Avg episode reward: [(0, '53.646')] +[2024-07-05 16:55:33,037][04594] Updated weights for policy 0, policy_version 14234 (0.0015) +[2024-07-05 16:55:33,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9079.5, 300 sec: 9108.4). Total num frames: 58310656. Throughput: 0: 2263.6. Samples: 9572250. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:55:33,942][04005] Avg episode reward: [(0, '52.250')] +[2024-07-05 16:55:37,537][04594] Updated weights for policy 0, policy_version 14244 (0.0015) +[2024-07-05 16:55:38,941][04005] Fps is (10 sec: 9420.8, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58355712. Throughput: 0: 2268.3. Samples: 9586084. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:55:38,942][04005] Avg episode reward: [(0, '50.757')] +[2024-07-05 16:55:42,047][04594] Updated weights for policy 0, policy_version 14254 (0.0015) +[2024-07-05 16:55:43,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58400768. Throughput: 0: 2270.3. Samples: 9599648. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:55:43,942][04005] Avg episode reward: [(0, '50.536')] +[2024-07-05 16:55:46,544][04594] Updated weights for policy 0, policy_version 14264 (0.0016) +[2024-07-05 16:55:48,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58445824. Throughput: 0: 2270.3. Samples: 9606574. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:55:48,942][04005] Avg episode reward: [(0, '49.929')] +[2024-07-05 16:55:51,066][04594] Updated weights for policy 0, policy_version 14274 (0.0016) +[2024-07-05 16:55:53,942][04005] Fps is (10 sec: 9010.8, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58490880. Throughput: 0: 2271.1. Samples: 9620106. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:55:53,943][04005] Avg episode reward: [(0, '51.306')] +[2024-07-05 16:55:55,575][04594] Updated weights for policy 0, policy_version 14284 (0.0015) +[2024-07-05 16:55:58,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58535936. Throughput: 0: 2269.4. Samples: 9633626. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:55:58,942][04005] Avg episode reward: [(0, '50.709')] +[2024-07-05 16:56:00,100][04594] Updated weights for policy 0, policy_version 14294 (0.0017) +[2024-07-05 16:56:03,941][04005] Fps is (10 sec: 9011.6, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58580992. Throughput: 0: 2271.7. Samples: 9640534. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:03,943][04005] Avg episode reward: [(0, '52.264')] +[2024-07-05 16:56:04,598][04594] Updated weights for policy 0, policy_version 14304 (0.0016) +[2024-07-05 16:56:08,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58626048. Throughput: 0: 2269.9. Samples: 9654046. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:08,942][04005] Avg episode reward: [(0, '52.323')] +[2024-07-05 16:56:09,098][04594] Updated weights for policy 0, policy_version 14314 (0.0016) +[2024-07-05 16:56:13,627][04594] Updated weights for policy 0, policy_version 14324 (0.0017) +[2024-07-05 16:56:13,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9080.6). Total num frames: 58671104. Throughput: 0: 2267.9. Samples: 9667690. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:13,943][04005] Avg episode reward: [(0, '52.270')] +[2024-07-05 16:56:18,121][04594] Updated weights for policy 0, policy_version 14334 (0.0016) +[2024-07-05 16:56:18,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9080.6). Total num frames: 58716160. Throughput: 0: 2272.0. Samples: 9674490. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:18,943][04005] Avg episode reward: [(0, '52.779')] +[2024-07-05 16:56:22,637][04594] Updated weights for policy 0, policy_version 14344 (0.0018) +[2024-07-05 16:56:23,942][04005] Fps is (10 sec: 9010.7, 60 sec: 9079.4, 300 sec: 9080.6). Total num frames: 58761216. Throughput: 0: 2266.9. Samples: 9688094. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:23,943][04005] Avg episode reward: [(0, '51.339')] +[2024-07-05 16:56:27,154][04594] Updated weights for policy 0, policy_version 14354 (0.0017) +[2024-07-05 16:56:28,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.4, 300 sec: 9080.6). Total num frames: 58806272. Throughput: 0: 2270.7. Samples: 9701828. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:28,943][04005] Avg episode reward: [(0, '51.400')] +[2024-07-05 16:56:31,695][04594] Updated weights for policy 0, policy_version 14364 (0.0019) +[2024-07-05 16:56:33,942][04005] Fps is (10 sec: 9011.6, 60 sec: 9011.2, 300 sec: 9080.6). Total num frames: 58851328. Throughput: 0: 2263.2. Samples: 9708420. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:33,943][04005] Avg episode reward: [(0, '49.805')] +[2024-07-05 16:56:33,970][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000014369_58855424.pth... +[2024-07-05 16:56:34,061][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000013837_56676352.pth +[2024-07-05 16:56:36,227][04594] Updated weights for policy 0, policy_version 14374 (0.0020) +[2024-07-05 16:56:38,942][04005] Fps is (10 sec: 9010.6, 60 sec: 9011.1, 300 sec: 9080.6). Total num frames: 58896384. Throughput: 0: 2267.9. Samples: 9722160. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:38,944][04005] Avg episode reward: [(0, '50.241')] +[2024-07-05 16:56:40,758][04594] Updated weights for policy 0, policy_version 14384 (0.0017) +[2024-07-05 16:56:43,941][04005] Fps is (10 sec: 9421.1, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58945536. Throughput: 0: 2268.0. Samples: 9735686. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:43,943][04005] Avg episode reward: [(0, '51.349')] +[2024-07-05 16:56:45,308][04594] Updated weights for policy 0, policy_version 14394 (0.0017) +[2024-07-05 16:56:48,942][04005] Fps is (10 sec: 9421.4, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 58990592. Throughput: 0: 2260.9. Samples: 9742274. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:48,942][04005] Avg episode reward: [(0, '50.742')] +[2024-07-05 16:56:49,853][04594] Updated weights for policy 0, policy_version 14404 (0.0017) +[2024-07-05 16:56:53,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9094.5). Total num frames: 59035648. Throughput: 0: 2266.7. Samples: 9756048. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:53,943][04005] Avg episode reward: [(0, '51.053')] +[2024-07-05 16:56:54,404][04594] Updated weights for policy 0, policy_version 14414 (0.0016) +[2024-07-05 16:56:58,920][04594] Updated weights for policy 0, policy_version 14424 (0.0017) +[2024-07-05 16:56:58,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9080.6). Total num frames: 59080704. Throughput: 0: 2262.8. Samples: 9769514. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:56:58,942][04005] Avg episode reward: [(0, '51.638')] +[2024-07-05 16:57:03,442][04594] Updated weights for policy 0, policy_version 14434 (0.0017) +[2024-07-05 16:57:03,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9080.6). Total num frames: 59125760. Throughput: 0: 2260.4. Samples: 9776208. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:03,943][04005] Avg episode reward: [(0, '52.314')] +[2024-07-05 16:57:07,966][04594] Updated weights for policy 0, policy_version 14444 (0.0015) +[2024-07-05 16:57:08,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9080.6). Total num frames: 59170816. Throughput: 0: 2262.5. Samples: 9789906. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:08,942][04005] Avg episode reward: [(0, '51.443')] +[2024-07-05 16:57:12,492][04594] Updated weights for policy 0, policy_version 14454 (0.0016) +[2024-07-05 16:57:13,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9080.6). Total num frames: 59215872. Throughput: 0: 2257.7. Samples: 9803424. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:13,943][04005] Avg episode reward: [(0, '51.775')] +[2024-07-05 16:57:17,000][04594] Updated weights for policy 0, policy_version 14464 (0.0015) +[2024-07-05 16:57:18,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.5, 300 sec: 9080.6). Total num frames: 59260928. Throughput: 0: 2264.1. Samples: 9810306. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:18,942][04005] Avg episode reward: [(0, '52.149')] +[2024-07-05 16:57:21,523][04594] Updated weights for policy 0, policy_version 14474 (0.0019) +[2024-07-05 16:57:23,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.6, 300 sec: 9080.6). Total num frames: 59305984. Throughput: 0: 2259.5. Samples: 9823834. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:23,942][04005] Avg episode reward: [(0, '52.452')] +[2024-07-05 16:57:26,056][04594] Updated weights for policy 0, policy_version 14484 (0.0017) +[2024-07-05 16:57:28,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9080.6). Total num frames: 59351040. Throughput: 0: 2258.3. Samples: 9837308. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:28,943][04005] Avg episode reward: [(0, '51.523')] +[2024-07-05 16:57:30,570][04594] Updated weights for policy 0, policy_version 14494 (0.0016) +[2024-07-05 16:57:33,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9080.6). Total num frames: 59396096. Throughput: 0: 2265.0. Samples: 9844200. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:33,943][04005] Avg episode reward: [(0, '52.343')] +[2024-07-05 16:57:35,102][04594] Updated weights for policy 0, policy_version 14504 (0.0016) +[2024-07-05 16:57:38,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.6, 300 sec: 9080.6). Total num frames: 59441152. Throughput: 0: 2259.0. Samples: 9857704. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:38,942][04005] Avg episode reward: [(0, '52.272')] +[2024-07-05 16:57:39,662][04594] Updated weights for policy 0, policy_version 14514 (0.0016) +[2024-07-05 16:57:43,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9011.2, 300 sec: 9066.7). Total num frames: 59486208. Throughput: 0: 2259.3. Samples: 9871184. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:43,943][04005] Avg episode reward: [(0, '50.983')] +[2024-07-05 16:57:44,182][04594] Updated weights for policy 0, policy_version 14524 (0.0017) +[2024-07-05 16:57:48,716][04594] Updated weights for policy 0, policy_version 14534 (0.0016) +[2024-07-05 16:57:48,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9011.2, 300 sec: 9066.7). Total num frames: 59531264. Throughput: 0: 2263.8. Samples: 9878080. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:48,942][04005] Avg episode reward: [(0, '52.466')] +[2024-07-05 16:57:53,243][04594] Updated weights for policy 0, policy_version 14544 (0.0018) +[2024-07-05 16:57:53,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9011.2, 300 sec: 9066.7). Total num frames: 59576320. Throughput: 0: 2259.0. Samples: 9891560. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:53,942][04005] Avg episode reward: [(0, '50.897')] +[2024-07-05 16:57:57,775][04594] Updated weights for policy 0, policy_version 14554 (0.0017) +[2024-07-05 16:57:58,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9011.2, 300 sec: 9066.7). Total num frames: 59621376. Throughput: 0: 2258.4. Samples: 9905052. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:57:58,943][04005] Avg episode reward: [(0, '49.607')] +[2024-07-05 16:58:02,312][04594] Updated weights for policy 0, policy_version 14564 (0.0016) +[2024-07-05 16:58:03,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9011.2, 300 sec: 9066.8). Total num frames: 59666432. Throughput: 0: 2258.7. Samples: 9911946. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:58:03,942][04005] Avg episode reward: [(0, '50.580')] +[2024-07-05 16:58:06,826][04594] Updated weights for policy 0, policy_version 14574 (0.0019) +[2024-07-05 16:58:08,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9011.2, 300 sec: 9066.7). Total num frames: 59711488. Throughput: 0: 2257.7. Samples: 9925430. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:58:08,943][04005] Avg episode reward: [(0, '49.995')] +[2024-07-05 16:58:11,343][04594] Updated weights for policy 0, policy_version 14584 (0.0016) +[2024-07-05 16:58:13,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9011.2, 300 sec: 9066.7). Total num frames: 59756544. Throughput: 0: 2263.2. Samples: 9939150. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:58:13,943][04005] Avg episode reward: [(0, '51.003')] +[2024-07-05 16:58:15,869][04594] Updated weights for policy 0, policy_version 14594 (0.0016) +[2024-07-05 16:58:18,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9011.2, 300 sec: 9066.7). Total num frames: 59801600. Throughput: 0: 2258.6. Samples: 9945838. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:58:18,943][04005] Avg episode reward: [(0, '51.455')] +[2024-07-05 16:58:20,374][04594] Updated weights for policy 0, policy_version 14604 (0.0016) +[2024-07-05 16:58:23,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9011.2, 300 sec: 9066.7). Total num frames: 59846656. Throughput: 0: 2262.1. Samples: 9959500. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:58:23,943][04005] Avg episode reward: [(0, '50.376')] +[2024-07-05 16:58:24,889][04594] Updated weights for policy 0, policy_version 14614 (0.0018) +[2024-07-05 16:58:28,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9011.2, 300 sec: 9066.7). Total num frames: 59891712. Throughput: 0: 2267.0. Samples: 9973200. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:58:28,942][04005] Avg episode reward: [(0, '51.521')] +[2024-07-05 16:58:29,429][04594] Updated weights for policy 0, policy_version 14624 (0.0016) +[2024-07-05 16:58:33,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9011.2, 300 sec: 9052.8). Total num frames: 59936768. Throughput: 0: 2260.4. Samples: 9979798. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:58:33,943][04005] Avg episode reward: [(0, '49.435')] +[2024-07-05 16:58:33,964][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000014634_59940864.pth... +[2024-07-05 16:58:33,967][04594] Updated weights for policy 0, policy_version 14634 (0.0017) +[2024-07-05 16:58:34,050][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000014103_57765888.pth +[2024-07-05 16:58:38,482][04594] Updated weights for policy 0, policy_version 14644 (0.0016) +[2024-07-05 16:58:38,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9079.5, 300 sec: 9066.7). Total num frames: 59985920. Throughput: 0: 2267.6. Samples: 9993600. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:58:38,942][04005] Avg episode reward: [(0, '50.244')] +[2024-07-05 16:58:43,026][04594] Updated weights for policy 0, policy_version 14654 (0.0015) +[2024-07-05 16:58:43,941][04005] Fps is (10 sec: 9421.0, 60 sec: 9079.5, 300 sec: 9066.7). Total num frames: 60030976. Throughput: 0: 2267.6. Samples: 10007092. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:58:43,942][04005] Avg episode reward: [(0, '49.311')] +[2024-07-05 16:58:47,546][04594] Updated weights for policy 0, policy_version 14664 (0.0018) +[2024-07-05 16:58:48,941][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9066.7). Total num frames: 60076032. Throughput: 0: 2262.4. Samples: 10013756. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:58:48,942][04005] Avg episode reward: [(0, '48.732')] +[2024-07-05 16:58:52,049][04594] Updated weights for policy 0, policy_version 14674 (0.0016) +[2024-07-05 16:58:53,949][04005] Fps is (10 sec: 9005.1, 60 sec: 9078.5, 300 sec: 9066.5). Total num frames: 60121088. Throughput: 0: 2268.9. Samples: 10027546. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:58:53,950][04005] Avg episode reward: [(0, '50.663')] +[2024-07-05 16:58:56,571][04594] Updated weights for policy 0, policy_version 14684 (0.0017) +[2024-07-05 16:58:58,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9066.7). Total num frames: 60166144. Throughput: 0: 2264.2. Samples: 10041038. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:58:58,942][04005] Avg episode reward: [(0, '51.273')] +[2024-07-05 16:59:01,087][04594] Updated weights for policy 0, policy_version 14694 (0.0016) +[2024-07-05 16:59:03,942][04005] Fps is (10 sec: 9017.2, 60 sec: 9079.4, 300 sec: 9066.7). Total num frames: 60211200. Throughput: 0: 2269.6. Samples: 10047968. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:59:03,942][04005] Avg episode reward: [(0, '53.057')] +[2024-07-05 16:59:05,600][04594] Updated weights for policy 0, policy_version 14704 (0.0016) +[2024-07-05 16:59:08,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9066.7). Total num frames: 60256256. Throughput: 0: 2267.1. Samples: 10061520. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:59:08,942][04005] Avg episode reward: [(0, '53.547')] +[2024-07-05 16:59:10,116][04594] Updated weights for policy 0, policy_version 14714 (0.0017) +[2024-07-05 16:59:13,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 9066.7). Total num frames: 60301312. Throughput: 0: 2261.3. Samples: 10074958. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:59:13,942][04005] Avg episode reward: [(0, '52.893')] +[2024-07-05 16:59:14,640][04594] Updated weights for policy 0, policy_version 14724 (0.0016) +[2024-07-05 16:59:18,942][04005] Fps is (10 sec: 9010.9, 60 sec: 9079.4, 300 sec: 9066.7). Total num frames: 60346368. Throughput: 0: 2268.2. Samples: 10081868. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:59:18,943][04005] Avg episode reward: [(0, '52.129')] +[2024-07-05 16:59:19,163][04594] Updated weights for policy 0, policy_version 14734 (0.0015) +[2024-07-05 16:59:23,671][04594] Updated weights for policy 0, policy_version 14744 (0.0017) +[2024-07-05 16:59:23,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.5, 300 sec: 9066.7). Total num frames: 60391424. Throughput: 0: 2261.5. Samples: 10095368. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 16:59:23,943][04005] Avg episode reward: [(0, '51.576')] +[2024-07-05 16:59:28,217][04594] Updated weights for policy 0, policy_version 14754 (0.0015) +[2024-07-05 16:59:28,941][04005] Fps is (10 sec: 9011.5, 60 sec: 9079.5, 300 sec: 9052.9). Total num frames: 60436480. Throughput: 0: 2261.8. Samples: 10108872. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:59:28,942][04005] Avg episode reward: [(0, '51.010')] +[2024-07-05 16:59:32,739][04594] Updated weights for policy 0, policy_version 14764 (0.0015) +[2024-07-05 16:59:33,942][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.5, 300 sec: 9052.9). Total num frames: 60481536. Throughput: 0: 2266.6. Samples: 10115754. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:59:33,942][04005] Avg episode reward: [(0, '51.544')] +[2024-07-05 16:59:37,354][04594] Updated weights for policy 0, policy_version 14774 (0.0015) +[2024-07-05 16:59:38,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9011.2, 300 sec: 9052.9). Total num frames: 60526592. Throughput: 0: 2255.9. Samples: 10129046. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:59:38,943][04005] Avg episode reward: [(0, '52.689')] +[2024-07-05 16:59:43,948][04005] Fps is (10 sec: 6959.5, 60 sec: 8669.1, 300 sec: 8983.3). Total num frames: 60551168. Throughput: 0: 2143.0. Samples: 10137486. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:59:43,974][04005] Avg episode reward: [(0, '50.974')] +[2024-07-05 16:59:48,948][04005] Fps is (10 sec: 2456.2, 60 sec: 7918.2, 300 sec: 8830.5). Total num frames: 60551168. Throughput: 0: 1997.2. Samples: 10137852. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:59:48,978][04005] Avg episode reward: [(0, '51.482')] +[2024-07-05 16:59:49,486][04594] Updated weights for policy 0, policy_version 14784 (0.0133) +[2024-07-05 16:59:53,947][04005] Fps is (10 sec: 409.6, 60 sec: 7236.5, 300 sec: 8691.7). Total num frames: 60555264. Throughput: 0: 1713.3. Samples: 10138626. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:59:53,982][04005] Avg episode reward: [(0, '51.352')] +[2024-07-05 16:59:58,950][04005] Fps is (10 sec: 409.5, 60 sec: 6484.6, 300 sec: 8538.9). Total num frames: 60555264. Throughput: 0: 1417.4. Samples: 10138750. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 16:59:58,987][04005] Avg episode reward: [(0, '51.352')] +[2024-07-05 17:00:03,946][04005] Fps is (10 sec: 409.6, 60 sec: 5802.3, 300 sec: 8400.2). Total num frames: 60559360. Throughput: 0: 1280.3. Samples: 10139484. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:00:03,978][04005] Avg episode reward: [(0, '51.451')] +[2024-07-05 17:00:08,947][04005] Fps is (10 sec: 819.4, 60 sec: 5119.6, 300 sec: 8261.3). Total num frames: 60563456. Throughput: 0: 996.3. Samples: 10140208. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:00:08,974][04005] Avg episode reward: [(0, '51.431')] +[2024-07-05 17:00:13,950][04005] Fps is (10 sec: 409.5, 60 sec: 4368.7, 300 sec: 8108.6). Total num frames: 60563456. Throughput: 0: 712.5. Samples: 10140938. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:00:13,983][04005] Avg episode reward: [(0, '51.471')] +[2024-07-05 17:00:18,948][04005] Fps is (10 sec: 409.6, 60 sec: 3686.1, 300 sec: 7969.7). Total num frames: 60567552. Throughput: 0: 567.0. Samples: 10141272. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:00:18,968][04005] Avg episode reward: [(0, '51.481')] +[2024-07-05 17:00:23,952][04005] Fps is (10 sec: 409.4, 60 sec: 2935.0, 300 sec: 7816.9). Total num frames: 60567552. Throughput: 0: 287.4. Samples: 10141980. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:00:23,994][04005] Avg episode reward: [(0, '51.481')] +[2024-07-05 17:00:28,954][04005] Fps is (10 sec: 409.4, 60 sec: 2252.4, 300 sec: 7664.1). Total num frames: 60571648. Throughput: 0: 116.4. Samples: 10142724. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:00:28,987][04005] Avg episode reward: [(0, '51.481')] +[2024-07-05 17:00:33,954][04005] Fps is (10 sec: 819.1, 60 sec: 1569.9, 300 sec: 7525.3). Total num frames: 60575744. Throughput: 0: 117.1. Samples: 10143124. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:00:33,990][04005] Avg episode reward: [(0, '51.481')] +[2024-07-05 17:00:34,251][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000014789_60575744.pth... +[2024-07-05 17:00:37,772][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000014369_58855424.pth +[2024-07-05 17:00:38,951][04005] Fps is (10 sec: 409.7, 60 sec: 819.1, 300 sec: 7372.6). Total num frames: 60575744. Throughput: 0: 114.8. Samples: 10143794. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:00:38,980][04005] Avg episode reward: [(0, '51.276')] +[2024-07-05 17:00:43,948][04005] Fps is (10 sec: 409.8, 60 sec: 477.9, 300 sec: 7233.8). Total num frames: 60579840. Throughput: 0: 127.3. Samples: 10144480. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:00:43,991][04005] Avg episode reward: [(0, '51.276')] +[2024-07-05 17:00:48,953][04005] Fps is (10 sec: 409.6, 60 sec: 477.8, 300 sec: 7081.0). Total num frames: 60579840. Throughput: 0: 118.5. Samples: 10144818. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:00:48,996][04005] Avg episode reward: [(0, '51.135')] +[2024-07-05 17:00:53,113][04594] Updated weights for policy 0, policy_version 14794 (0.0410) +[2024-07-05 17:00:53,941][04005] Fps is (10 sec: 2048.9, 60 sec: 751.0, 300 sec: 6997.9). Total num frames: 60600320. Throughput: 0: 169.3. Samples: 10147824. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:00:53,943][04005] Avg episode reward: [(0, '51.434')] +[2024-07-05 17:00:57,715][04594] Updated weights for policy 0, policy_version 14804 (0.0015) +[2024-07-05 17:00:58,942][04005] Fps is (10 sec: 6559.1, 60 sec: 1502.0, 300 sec: 6997.9). Total num frames: 60645376. Throughput: 0: 451.0. Samples: 10161232. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:00:58,943][04005] Avg episode reward: [(0, '50.936')] +[2024-07-05 17:01:02,262][04594] Updated weights for policy 0, policy_version 14814 (0.0013) +[2024-07-05 17:01:03,942][04005] Fps is (10 sec: 9011.1, 60 sec: 2184.7, 300 sec: 6997.9). Total num frames: 60690432. Throughput: 0: 594.2. Samples: 10168008. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:01:03,943][04005] Avg episode reward: [(0, '50.593')] +[2024-07-05 17:01:06,750][04594] Updated weights for policy 0, policy_version 14824 (0.0014) +[2024-07-05 17:01:08,942][04005] Fps is (10 sec: 9011.2, 60 sec: 2867.4, 300 sec: 6997.9). Total num frames: 60735488. Throughput: 0: 882.9. Samples: 10181702. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:01:08,942][04005] Avg episode reward: [(0, '51.110')] +[2024-07-05 17:01:11,249][04594] Updated weights for policy 0, policy_version 14834 (0.0014) +[2024-07-05 17:01:13,942][04005] Fps is (10 sec: 9011.3, 60 sec: 3618.4, 300 sec: 6997.9). Total num frames: 60780544. Throughput: 0: 1172.4. Samples: 10195468. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:01:13,943][04005] Avg episode reward: [(0, '51.730')] +[2024-07-05 17:01:15,756][04594] Updated weights for policy 0, policy_version 14844 (0.0016) +[2024-07-05 17:01:18,942][04005] Fps is (10 sec: 9420.8, 60 sec: 4369.5, 300 sec: 7011.8). Total num frames: 60829696. Throughput: 0: 1310.4. Samples: 10202078. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:01:18,943][04005] Avg episode reward: [(0, '51.650')] +[2024-07-05 17:01:20,257][04594] Updated weights for policy 0, policy_version 14854 (0.0016) +[2024-07-05 17:01:23,942][04005] Fps is (10 sec: 9420.8, 60 sec: 5120.8, 300 sec: 7011.8). Total num frames: 60874752. Throughput: 0: 1604.5. Samples: 10215986. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:01:23,953][04005] Avg episode reward: [(0, '50.597')] +[2024-07-05 17:01:24,746][04594] Updated weights for policy 0, policy_version 14864 (0.0015) +[2024-07-05 17:01:28,942][04005] Fps is (10 sec: 9010.8, 60 sec: 5803.7, 300 sec: 7011.8). Total num frames: 60919808. Throughput: 0: 1890.0. Samples: 10229524. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:01:28,943][04005] Avg episode reward: [(0, '51.289')] +[2024-07-05 17:01:29,267][04594] Updated weights for policy 0, policy_version 14874 (0.0017) +[2024-07-05 17:01:33,807][04594] Updated weights for policy 0, policy_version 14884 (0.0016) +[2024-07-05 17:01:33,942][04005] Fps is (10 sec: 9011.1, 60 sec: 6486.4, 300 sec: 7011.8). Total num frames: 60964864. Throughput: 0: 2036.2. Samples: 10236430. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:01:33,943][04005] Avg episode reward: [(0, '50.903')] +[2024-07-05 17:01:38,318][04594] Updated weights for policy 0, policy_version 14894 (0.0014) +[2024-07-05 17:01:38,942][04005] Fps is (10 sec: 9011.5, 60 sec: 7237.2, 300 sec: 6997.9). Total num frames: 61009920. Throughput: 0: 2269.2. Samples: 10249936. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:01:38,943][04005] Avg episode reward: [(0, '50.222')] +[2024-07-05 17:01:42,857][04594] Updated weights for policy 0, policy_version 14904 (0.0016) +[2024-07-05 17:01:43,942][04005] Fps is (10 sec: 9011.3, 60 sec: 7919.5, 300 sec: 6997.9). Total num frames: 61054976. Throughput: 0: 2270.1. Samples: 10263388. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:01:43,943][04005] Avg episode reward: [(0, '51.565')] +[2024-07-05 17:01:47,381][04594] Updated weights for policy 0, policy_version 14914 (0.0015) +[2024-07-05 17:01:48,942][04005] Fps is (10 sec: 9011.2, 60 sec: 8671.1, 300 sec: 6997.9). Total num frames: 61100032. Throughput: 0: 2272.8. Samples: 10270284. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:01:48,943][04005] Avg episode reward: [(0, '51.461')] +[2024-07-05 17:01:51,892][04594] Updated weights for policy 0, policy_version 14924 (0.0015) +[2024-07-05 17:01:53,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.5, 300 sec: 6997.9). Total num frames: 61145088. Throughput: 0: 2269.6. Samples: 10283832. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:01:53,943][04005] Avg episode reward: [(0, '52.532')] +[2024-07-05 17:01:56,426][04594] Updated weights for policy 0, policy_version 14934 (0.0015) +[2024-07-05 17:01:58,942][04005] Fps is (10 sec: 9010.9, 60 sec: 9079.4, 300 sec: 6997.9). Total num frames: 61190144. Throughput: 0: 2262.1. Samples: 10297264. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:01:58,943][04005] Avg episode reward: [(0, '51.852')] +[2024-07-05 17:02:00,959][04594] Updated weights for policy 0, policy_version 14944 (0.0015) +[2024-07-05 17:02:03,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 6997.9). Total num frames: 61235200. Throughput: 0: 2268.9. Samples: 10304180. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:03,942][04005] Avg episode reward: [(0, '50.405')] +[2024-07-05 17:02:05,477][04594] Updated weights for policy 0, policy_version 14954 (0.0015) +[2024-07-05 17:02:08,941][04005] Fps is (10 sec: 9011.6, 60 sec: 9079.5, 300 sec: 6997.9). Total num frames: 61280256. Throughput: 0: 2260.1. Samples: 10317690. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:08,942][04005] Avg episode reward: [(0, '51.241')] +[2024-07-05 17:02:10,022][04594] Updated weights for policy 0, policy_version 14964 (0.0017) +[2024-07-05 17:02:13,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9079.4, 300 sec: 6997.9). Total num frames: 61325312. Throughput: 0: 2262.0. Samples: 10331314. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:13,943][04005] Avg episode reward: [(0, '52.037')] +[2024-07-05 17:02:14,533][04594] Updated weights for policy 0, policy_version 14974 (0.0016) +[2024-07-05 17:02:18,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9011.2, 300 sec: 6997.9). Total num frames: 61370368. Throughput: 0: 2260.0. Samples: 10338128. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:18,943][04005] Avg episode reward: [(0, '53.524')] +[2024-07-05 17:02:19,064][04594] Updated weights for policy 0, policy_version 14984 (0.0017) +[2024-07-05 17:02:23,596][04594] Updated weights for policy 0, policy_version 14994 (0.0017) +[2024-07-05 17:02:23,942][04005] Fps is (10 sec: 9011.3, 60 sec: 9011.2, 300 sec: 6997.9). Total num frames: 61415424. Throughput: 0: 2259.0. Samples: 10351592. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:23,943][04005] Avg episode reward: [(0, '54.633')] +[2024-07-05 17:02:24,049][04581] Saving new best policy, reward=54.633! +[2024-07-05 17:02:28,091][04594] Updated weights for policy 0, policy_version 15004 (0.0017) +[2024-07-05 17:02:28,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9011.3, 300 sec: 6997.9). Total num frames: 61460480. Throughput: 0: 2267.4. Samples: 10365422. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:28,942][04005] Avg episode reward: [(0, '54.707')] +[2024-07-05 17:02:28,995][04581] Saving new best policy, reward=54.707! +[2024-07-05 17:02:32,612][04594] Updated weights for policy 0, policy_version 15014 (0.0016) +[2024-07-05 17:02:33,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9011.2, 300 sec: 6997.9). Total num frames: 61505536. Throughput: 0: 2261.2. Samples: 10372040. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:33,943][04005] Avg episode reward: [(0, '55.424')] +[2024-07-05 17:02:33,971][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015017_61509632.pth... +[2024-07-05 17:02:34,060][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000014634_59940864.pth +[2024-07-05 17:02:34,071][04581] Saving new best policy, reward=55.424! +[2024-07-05 17:02:37,140][04594] Updated weights for policy 0, policy_version 15024 (0.0016) +[2024-07-05 17:02:38,942][04005] Fps is (10 sec: 9011.0, 60 sec: 9011.2, 300 sec: 6997.9). Total num frames: 61550592. Throughput: 0: 2264.9. Samples: 10385752. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:38,943][04005] Avg episode reward: [(0, '54.501')] +[2024-07-05 17:02:41,716][04594] Updated weights for policy 0, policy_version 15034 (0.0018) +[2024-07-05 17:02:43,942][04005] Fps is (10 sec: 9011.2, 60 sec: 9011.2, 300 sec: 6997.9). Total num frames: 61595648. Throughput: 0: 2266.0. Samples: 10399234. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:43,943][04005] Avg episode reward: [(0, '54.323')] +[2024-07-05 17:02:46,273][04594] Updated weights for policy 0, policy_version 15044 (0.0015) +[2024-07-05 17:02:48,941][04005] Fps is (10 sec: 9011.4, 60 sec: 9011.2, 300 sec: 6997.9). Total num frames: 61640704. Throughput: 0: 2258.2. Samples: 10405800. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:48,943][04005] Avg episode reward: [(0, '54.372')] +[2024-07-05 17:02:50,788][04594] Updated weights for policy 0, policy_version 15054 (0.0015) +[2024-07-05 17:02:53,942][04005] Fps is (10 sec: 9011.0, 60 sec: 9011.2, 300 sec: 6997.9). Total num frames: 61685760. Throughput: 0: 2263.0. Samples: 10419526. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:53,943][04005] Avg episode reward: [(0, '52.484')] +[2024-07-05 17:02:55,327][04594] Updated weights for policy 0, policy_version 15064 (0.0016) +[2024-07-05 17:02:58,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9011.3, 300 sec: 6997.9). Total num frames: 61730816. Throughput: 0: 2261.2. Samples: 10433068. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:02:58,943][04005] Avg episode reward: [(0, '50.779')] +[2024-07-05 17:02:59,866][04594] Updated weights for policy 0, policy_version 15074 (0.0017) +[2024-07-05 17:03:03,941][04005] Fps is (10 sec: 9421.1, 60 sec: 9079.5, 300 sec: 7011.8). Total num frames: 61779968. Throughput: 0: 2256.0. Samples: 10439648. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:03,942][04005] Avg episode reward: [(0, '50.438')] +[2024-07-05 17:03:04,403][04594] Updated weights for policy 0, policy_version 15084 (0.0017) +[2024-07-05 17:03:08,941][04005] Fps is (10 sec: 9420.9, 60 sec: 9079.5, 300 sec: 7011.8). Total num frames: 61825024. Throughput: 0: 2261.6. Samples: 10453366. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:08,942][04005] Avg episode reward: [(0, '50.030')] +[2024-07-05 17:03:08,945][04594] Updated weights for policy 0, policy_version 15094 (0.0015) +[2024-07-05 17:03:13,499][04594] Updated weights for policy 0, policy_version 15104 (0.0020) +[2024-07-05 17:03:13,942][04005] Fps is (10 sec: 8601.5, 60 sec: 9011.2, 300 sec: 6997.9). Total num frames: 61865984. Throughput: 0: 2254.1. Samples: 10466858. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:13,943][04005] Avg episode reward: [(0, '49.809')] +[2024-07-05 17:03:18,035][04594] Updated weights for policy 0, policy_version 15114 (0.0016) +[2024-07-05 17:03:18,941][04005] Fps is (10 sec: 9011.2, 60 sec: 9079.5, 300 sec: 7011.8). Total num frames: 61915136. Throughput: 0: 2252.9. Samples: 10473420. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:18,943][04005] Avg episode reward: [(0, '51.183')] +[2024-07-05 17:03:22,564][04594] Updated weights for policy 0, policy_version 15124 (0.0018) +[2024-07-05 17:03:23,942][04005] Fps is (10 sec: 9420.8, 60 sec: 9079.5, 300 sec: 7011.8). Total num frames: 61960192. Throughput: 0: 2255.2. Samples: 10487234. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:23,942][04005] Avg episode reward: [(0, '49.288')] +[2024-07-05 17:03:27,099][04594] Updated weights for policy 0, policy_version 15134 (0.0016) +[2024-07-05 17:03:28,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.5, 300 sec: 7011.8). Total num frames: 62005248. Throughput: 0: 2254.8. Samples: 10500698. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:28,942][04005] Avg episode reward: [(0, '49.154')] +[2024-07-05 17:03:31,635][04594] Updated weights for policy 0, policy_version 15144 (0.0015) +[2024-07-05 17:03:33,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9079.5, 300 sec: 6997.9). Total num frames: 62050304. Throughput: 0: 2256.8. Samples: 10507358. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:33,942][04005] Avg episode reward: [(0, '50.538')] +[2024-07-05 17:03:36,241][04594] Updated weights for policy 0, policy_version 15154 (0.0013) +[2024-07-05 17:03:38,941][04005] Fps is (10 sec: 8601.6, 60 sec: 9011.2, 300 sec: 6984.0). Total num frames: 62091264. Throughput: 0: 2245.3. Samples: 10520562. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:38,943][04005] Avg episode reward: [(0, '52.238')] +[2024-07-05 17:03:40,998][04594] Updated weights for policy 0, policy_version 15164 (0.0017) +[2024-07-05 17:03:43,950][04005] Fps is (10 sec: 7776.3, 60 sec: 8873.5, 300 sec: 6956.1). Total num frames: 62128128. Throughput: 0: 2194.1. Samples: 10531820. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:43,973][04005] Avg episode reward: [(0, '51.602')] +[2024-07-05 17:03:48,952][04005] Fps is (10 sec: 4092.1, 60 sec: 8190.7, 300 sec: 6817.3). Total num frames: 62132224. Throughput: 0: 2056.0. Samples: 10532186. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:48,983][04005] Avg episode reward: [(0, '51.924')] +[2024-07-05 17:03:53,953][04005] Fps is (10 sec: 409.5, 60 sec: 7439.8, 300 sec: 6664.4). Total num frames: 62132224. Throughput: 0: 1767.8. Samples: 10532936. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:03:53,994][04005] Avg episode reward: [(0, '51.924')] +[2024-07-05 17:03:58,950][04005] Fps is (10 sec: 409.7, 60 sec: 6757.6, 300 sec: 6525.7). Total num frames: 62136320. Throughput: 0: 1483.4. Samples: 10533622. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 17:03:58,984][04005] Avg episode reward: [(0, '51.954')] +[2024-07-05 17:04:03,947][04005] Fps is (10 sec: 409.8, 60 sec: 5938.7, 300 sec: 6373.0). Total num frames: 62136320. Throughput: 0: 1346.2. Samples: 10534004. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 17:04:03,990][04005] Avg episode reward: [(0, '52.266')] +[2024-07-05 17:04:08,950][04005] Fps is (10 sec: 409.6, 60 sec: 5255.9, 300 sec: 6234.1). Total num frames: 62140416. Throughput: 0: 1055.6. Samples: 10534746. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 17:04:08,985][04005] Avg episode reward: [(0, '52.632')] +[2024-07-05 17:04:13,952][04005] Fps is (10 sec: 409.4, 60 sec: 4573.1, 300 sec: 6081.3). Total num frames: 62140416. Throughput: 0: 772.9. Samples: 10535484. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 17:04:13,994][04005] Avg episode reward: [(0, '52.417')] +[2024-07-05 17:04:18,952][04005] Fps is (10 sec: 409.5, 60 sec: 3822.3, 300 sec: 5942.5). Total num frames: 62144512. Throughput: 0: 632.7. Samples: 10535834. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 17:04:19,027][04005] Avg episode reward: [(0, '52.659')] +[2024-07-05 17:04:23,949][04005] Fps is (10 sec: 819.5, 60 sec: 3140.0, 300 sec: 5803.7). Total num frames: 62148608. Throughput: 0: 354.5. Samples: 10536518. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 17:04:23,977][04005] Avg episode reward: [(0, '52.957')] +[2024-07-05 17:04:28,955][04005] Fps is (10 sec: 409.5, 60 sec: 2388.9, 300 sec: 5650.9). Total num frames: 62148608. Throughput: 0: 119.8. Samples: 10537210. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 17:04:28,983][04005] Avg episode reward: [(0, '52.957')] +[2024-07-05 17:04:32,037][04594] Updated weights for policy 0, policy_version 15174 (0.0440) +[2024-07-05 17:04:33,958][04005] Fps is (10 sec: 409.3, 60 sec: 1706.3, 300 sec: 5512.0). Total num frames: 62152704. Throughput: 0: 119.9. Samples: 10537582. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:04:34,012][04005] Avg episode reward: [(0, '52.632')] +[2024-07-05 17:04:34,402][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015174_62152704.pth... +[2024-07-05 17:04:38,182][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000014789_60575744.pth +[2024-07-05 17:04:38,950][04005] Fps is (10 sec: 409.8, 60 sec: 1023.9, 300 sec: 5428.9). Total num frames: 62152704. Throughput: 0: 115.1. Samples: 10538114. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:04:38,994][04005] Avg episode reward: [(0, '52.652')] +[2024-07-05 17:04:43,957][04005] Fps is (10 sec: 409.7, 60 sec: 477.8, 300 sec: 5442.7). Total num frames: 62156800. Throughput: 0: 105.7. Samples: 10538378. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:04:44,035][04005] Avg episode reward: [(0, '52.652')] +[2024-07-05 17:04:48,955][04005] Fps is (10 sec: 409.4, 60 sec: 409.6, 300 sec: 5428.8). Total num frames: 62156800. Throughput: 0: 111.1. Samples: 10539004. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:04:48,989][04005] Avg episode reward: [(0, '52.682')] +[2024-07-05 17:04:53,948][04005] Fps is (10 sec: 409.8, 60 sec: 477.9, 300 sec: 5442.8). Total num frames: 62160896. Throughput: 0: 109.3. Samples: 10539666. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:04:53,976][04005] Avg episode reward: [(0, '53.149')] +[2024-07-05 17:04:58,942][04005] Fps is (10 sec: 2460.4, 60 sec: 751.0, 300 sec: 5498.4). Total num frames: 62181376. Throughput: 0: 205.0. Samples: 10544706. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:04:58,943][04005] Avg episode reward: [(0, '53.534')] +[2024-07-05 17:05:00,308][04594] Updated weights for policy 0, policy_version 15184 (0.0121) +[2024-07-05 17:05:03,941][04005] Fps is (10 sec: 6147.9, 60 sec: 1433.7, 300 sec: 5623.4). Total num frames: 62222336. Throughput: 0: 341.3. Samples: 10551188. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:05:03,942][04005] Avg episode reward: [(0, '53.650')] +[2024-07-05 17:05:04,929][04594] Updated weights for policy 0, policy_version 15194 (0.0014) +[2024-07-05 17:05:08,942][04005] Fps is (10 sec: 8601.6, 60 sec: 2116.5, 300 sec: 5776.1). Total num frames: 62267392. Throughput: 0: 626.8. Samples: 10564720. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:08,942][04005] Avg episode reward: [(0, '52.590')] +[2024-07-05 17:05:09,434][04594] Updated weights for policy 0, policy_version 15204 (0.0015) +[2024-07-05 17:05:13,942][04005] Fps is (10 sec: 9011.1, 60 sec: 2867.7, 300 sec: 5915.0). Total num frames: 62312448. Throughput: 0: 916.9. Samples: 10578458. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:13,943][04005] Avg episode reward: [(0, '52.639')] +[2024-07-05 17:05:13,958][04594] Updated weights for policy 0, policy_version 15214 (0.0016) +[2024-07-05 17:05:18,472][04594] Updated weights for policy 0, policy_version 15224 (0.0017) +[2024-07-05 17:05:18,941][04005] Fps is (10 sec: 9420.9, 60 sec: 3618.7, 300 sec: 6081.7). Total num frames: 62361600. Throughput: 0: 1055.6. Samples: 10585068. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:18,942][04005] Avg episode reward: [(0, '49.682')] +[2024-07-05 17:05:22,982][04594] Updated weights for policy 0, policy_version 15234 (0.0018) +[2024-07-05 17:05:23,941][04005] Fps is (10 sec: 9420.9, 60 sec: 4301.2, 300 sec: 6220.6). Total num frames: 62406656. Throughput: 0: 1350.7. Samples: 10598888. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:23,942][04005] Avg episode reward: [(0, '49.289')] +[2024-07-05 17:05:27,514][04594] Updated weights for policy 0, policy_version 15244 (0.0015) +[2024-07-05 17:05:28,942][04005] Fps is (10 sec: 9010.8, 60 sec: 5052.7, 300 sec: 6359.4). Total num frames: 62451712. Throughput: 0: 1645.2. Samples: 10612396. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:28,943][04005] Avg episode reward: [(0, '49.613')] +[2024-07-05 17:05:32,020][04594] Updated weights for policy 0, policy_version 15254 (0.0016) +[2024-07-05 17:05:33,942][04005] Fps is (10 sec: 9011.1, 60 sec: 5735.7, 300 sec: 6512.1). Total num frames: 62496768. Throughput: 0: 1785.6. Samples: 10619334. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:33,943][04005] Avg episode reward: [(0, '49.252')] +[2024-07-05 17:05:36,554][04594] Updated weights for policy 0, policy_version 15264 (0.0017) +[2024-07-05 17:05:38,949][04005] Fps is (10 sec: 9005.5, 60 sec: 6485.4, 300 sec: 6650.7). Total num frames: 62541824. Throughput: 0: 2069.2. Samples: 10632780. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:38,955][04005] Avg episode reward: [(0, '51.048')] +[2024-07-05 17:05:41,135][04594] Updated weights for policy 0, policy_version 15274 (0.0019) +[2024-07-05 17:05:43,942][04005] Fps is (10 sec: 9011.1, 60 sec: 7169.3, 300 sec: 6803.7). Total num frames: 62586880. Throughput: 0: 2255.5. Samples: 10646204. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:43,943][04005] Avg episode reward: [(0, '51.797')] +[2024-07-05 17:05:45,724][04594] Updated weights for policy 0, policy_version 15284 (0.0015) +[2024-07-05 17:05:48,942][04005] Fps is (10 sec: 8607.4, 60 sec: 7852.1, 300 sec: 6872.9). Total num frames: 62627840. Throughput: 0: 2256.6. Samples: 10652736. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:48,943][04005] Avg episode reward: [(0, '52.530')] +[2024-07-05 17:05:50,356][04594] Updated weights for policy 0, policy_version 15294 (0.0016) +[2024-07-05 17:05:53,941][04005] Fps is (10 sec: 8601.8, 60 sec: 8534.2, 300 sec: 6873.0). Total num frames: 62672896. Throughput: 0: 2250.4. Samples: 10665986. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:53,943][04005] Avg episode reward: [(0, '53.352')] +[2024-07-05 17:05:54,987][04594] Updated weights for policy 0, policy_version 15304 (0.0015) +[2024-07-05 17:05:58,942][04005] Fps is (10 sec: 9011.2, 60 sec: 8942.9, 300 sec: 6873.0). Total num frames: 62717952. Throughput: 0: 2240.6. Samples: 10679284. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:05:58,943][04005] Avg episode reward: [(0, '52.311')] +[2024-07-05 17:05:59,606][04594] Updated weights for policy 0, policy_version 15314 (0.0015) +[2024-07-05 17:06:03,942][04005] Fps is (10 sec: 9011.1, 60 sec: 9011.2, 300 sec: 6872.9). Total num frames: 62763008. Throughput: 0: 2244.1. Samples: 10686052. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:03,943][04005] Avg episode reward: [(0, '51.519')] +[2024-07-05 17:06:04,231][04594] Updated weights for policy 0, policy_version 15324 (0.0016) +[2024-07-05 17:06:08,863][04594] Updated weights for policy 0, policy_version 15334 (0.0016) +[2024-07-05 17:06:08,941][04005] Fps is (10 sec: 9011.3, 60 sec: 9011.2, 300 sec: 6873.0). Total num frames: 62808064. Throughput: 0: 2232.2. Samples: 10699338. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:08,942][04005] Avg episode reward: [(0, '51.731')] +[2024-07-05 17:06:13,509][04594] Updated weights for policy 0, policy_version 15344 (0.0016) +[2024-07-05 17:06:13,942][04005] Fps is (10 sec: 8601.6, 60 sec: 8942.9, 300 sec: 6845.2). Total num frames: 62849024. Throughput: 0: 2226.7. Samples: 10712598. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:13,942][04005] Avg episode reward: [(0, '51.814')] +[2024-07-05 17:06:18,165][04594] Updated weights for policy 0, policy_version 15354 (0.0016) +[2024-07-05 17:06:18,941][04005] Fps is (10 sec: 8601.6, 60 sec: 8874.7, 300 sec: 6845.2). Total num frames: 62894080. Throughput: 0: 2216.4. Samples: 10719070. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:18,942][04005] Avg episode reward: [(0, '51.643')] +[2024-07-05 17:06:22,791][04594] Updated weights for policy 0, policy_version 15364 (0.0016) +[2024-07-05 17:06:23,941][04005] Fps is (10 sec: 9011.3, 60 sec: 8874.7, 300 sec: 6845.2). Total num frames: 62939136. Throughput: 0: 2212.2. Samples: 10732312. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:23,942][04005] Avg episode reward: [(0, '51.339')] +[2024-07-05 17:06:27,482][04594] Updated weights for policy 0, policy_version 15374 (0.0016) +[2024-07-05 17:06:28,941][04005] Fps is (10 sec: 9011.2, 60 sec: 8874.7, 300 sec: 6845.2). Total num frames: 62984192. Throughput: 0: 2206.7. Samples: 10745504. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:28,942][04005] Avg episode reward: [(0, '50.436')] +[2024-07-05 17:06:32,146][04594] Updated weights for policy 0, policy_version 15384 (0.0016) +[2024-07-05 17:06:33,941][04005] Fps is (10 sec: 8601.6, 60 sec: 8806.4, 300 sec: 6831.3). Total num frames: 63025152. Throughput: 0: 2205.1. Samples: 10751964. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:33,942][04005] Avg episode reward: [(0, '48.863')] +[2024-07-05 17:06:34,000][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015388_63029248.pth... +[2024-07-05 17:06:34,090][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015017_61509632.pth +[2024-07-05 17:06:36,795][04594] Updated weights for policy 0, policy_version 15394 (0.0016) +[2024-07-05 17:06:38,941][04005] Fps is (10 sec: 8601.7, 60 sec: 8807.4, 300 sec: 6831.3). Total num frames: 63070208. Throughput: 0: 2206.0. Samples: 10765258. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:38,942][04005] Avg episode reward: [(0, '49.870')] +[2024-07-05 17:06:41,421][04594] Updated weights for policy 0, policy_version 15404 (0.0016) +[2024-07-05 17:06:43,942][04005] Fps is (10 sec: 9011.1, 60 sec: 8806.4, 300 sec: 6831.3). Total num frames: 63115264. Throughput: 0: 2206.7. Samples: 10778586. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:43,943][04005] Avg episode reward: [(0, '50.293')] +[2024-07-05 17:06:46,032][04594] Updated weights for policy 0, policy_version 15414 (0.0016) +[2024-07-05 17:06:48,942][04005] Fps is (10 sec: 9010.8, 60 sec: 8874.6, 300 sec: 6831.3). Total num frames: 63160320. Throughput: 0: 2206.3. Samples: 10785334. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:48,943][04005] Avg episode reward: [(0, '51.186')] +[2024-07-05 17:06:50,663][04594] Updated weights for policy 0, policy_version 15424 (0.0015) +[2024-07-05 17:06:53,941][04005] Fps is (10 sec: 9011.3, 60 sec: 8874.7, 300 sec: 6831.3). Total num frames: 63205376. Throughput: 0: 2206.9. Samples: 10798650. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:53,943][04005] Avg episode reward: [(0, '51.154')] +[2024-07-05 17:06:55,292][04594] Updated weights for policy 0, policy_version 15434 (0.0014) +[2024-07-05 17:06:58,941][04005] Fps is (10 sec: 8601.9, 60 sec: 8806.4, 300 sec: 6817.4). Total num frames: 63246336. Throughput: 0: 2202.5. Samples: 10811710. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:06:58,942][04005] Avg episode reward: [(0, '51.685')] +[2024-07-05 17:07:00,032][04594] Updated weights for policy 0, policy_version 15444 (0.0015) +[2024-07-05 17:07:03,942][04005] Fps is (10 sec: 8601.6, 60 sec: 8806.4, 300 sec: 6817.4). Total num frames: 63291392. Throughput: 0: 2201.9. Samples: 10818154. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:07:03,943][04005] Avg episode reward: [(0, '51.077')] +[2024-07-05 17:07:04,795][04594] Updated weights for policy 0, policy_version 15454 (0.0016) +[2024-07-05 17:07:08,953][04005] Fps is (10 sec: 5728.9, 60 sec: 8259.0, 300 sec: 6706.1). Total num frames: 63303680. Throughput: 0: 2068.9. Samples: 10825432. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:07:09,010][04005] Avg episode reward: [(0, '51.513')] +[2024-07-05 17:07:13,956][04005] Fps is (10 sec: 1227.6, 60 sec: 7576.4, 300 sec: 6553.4). Total num frames: 63303680. Throughput: 0: 1780.8. Samples: 10825658. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:07:13,996][04005] Avg episode reward: [(0, '51.513')] +[2024-07-05 17:07:18,984][04005] Fps is (10 sec: 408.5, 60 sec: 6890.7, 300 sec: 6413.9). Total num frames: 63307776. Throughput: 0: 1648.3. Samples: 10826200. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 17:07:19,052][04005] Avg episode reward: [(0, '51.613')] +[2024-07-05 17:07:23,952][04005] Fps is (10 sec: 409.7, 60 sec: 6143.1, 300 sec: 6261.8). Total num frames: 63307776. Throughput: 0: 1365.7. Samples: 10826724. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 17:07:24,009][04005] Avg episode reward: [(0, '51.613')] +[2024-07-05 17:07:28,949][04005] Fps is (10 sec: 0.0, 60 sec: 5392.5, 300 sec: 6109.2). Total num frames: 63307776. Throughput: 0: 1080.1. Samples: 10827198. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 17:07:29,009][04005] Avg episode reward: [(0, '51.613')] +[2024-07-05 17:07:33,950][04005] Fps is (10 sec: 409.7, 60 sec: 4778.1, 300 sec: 5970.3). Total num frames: 63311872. Throughput: 0: 935.2. Samples: 10827424. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 17:07:34,025][04005] Avg episode reward: [(0, '51.859')] +[2024-07-05 17:07:38,952][04005] Fps is (10 sec: 409.5, 60 sec: 4027.2, 300 sec: 5817.5). Total num frames: 63311872. Throughput: 0: 651.2. Samples: 10827960. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-07-05 17:07:38,989][04005] Avg episode reward: [(0, '51.859')] +[2024-07-05 17:07:43,949][04005] Fps is (10 sec: 409.6, 60 sec: 3344.7, 300 sec: 5678.7). Total num frames: 63315968. Throughput: 0: 372.0. Samples: 10828450. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:07:44,001][04005] Avg episode reward: [(0, '51.859')] +[2024-07-05 17:07:48,952][04005] Fps is (10 sec: 409.6, 60 sec: 2593.8, 300 sec: 5526.0). Total num frames: 63315968. Throughput: 0: 234.0. Samples: 10828688. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:07:49,044][04005] Avg episode reward: [(0, '51.859')] +[2024-07-05 17:07:53,968][04005] Fps is (10 sec: 0.0, 60 sec: 1842.9, 300 sec: 5373.2). Total num frames: 63315968. Throughput: 0: 83.6. Samples: 10829192. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-07-05 17:07:54,054][04005] Avg episode reward: [(0, '51.787')] +[2024-07-05 17:07:58,950][04005] Fps is (10 sec: 409.7, 60 sec: 1228.7, 300 sec: 5220.5). Total num frames: 63320064. Throughput: 0: 89.7. Samples: 10829692. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 17:07:59,011][04005] Avg episode reward: [(0, '51.875')] +[2024-07-05 17:08:03,960][04005] Fps is (10 sec: 409.5, 60 sec: 477.8, 300 sec: 5067.7). Total num frames: 63320064. Throughput: 0: 83.4. Samples: 10829952. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 17:08:04,016][04005] Avg episode reward: [(0, '52.006')] +[2024-07-05 17:08:08,962][04005] Fps is (10 sec: 409.1, 60 sec: 341.3, 300 sec: 4942.7). Total num frames: 63324160. Throughput: 0: 83.1. Samples: 10830466. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 17:08:09,027][04005] Avg episode reward: [(0, '52.006')] +[2024-07-05 17:08:13,952][04005] Fps is (10 sec: 409.7, 60 sec: 341.3, 300 sec: 4776.2). Total num frames: 63324160. Throughput: 0: 82.9. Samples: 10830928. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 17:08:14,002][04005] Avg episode reward: [(0, '52.036')] +[2024-07-05 17:08:18,953][04005] Fps is (10 sec: 0.0, 60 sec: 273.2, 300 sec: 4623.5). Total num frames: 63324160. Throughput: 0: 83.9. Samples: 10831198. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-07-05 17:08:19,003][04005] Avg episode reward: [(0, '52.036')] +[2024-07-05 17:08:23,949][04005] Fps is (10 sec: 409.7, 60 sec: 341.3, 300 sec: 4484.7). Total num frames: 63328256. Throughput: 0: 82.3. Samples: 10831662. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:08:23,993][04005] Avg episode reward: [(0, '52.036')] +[2024-07-05 17:08:28,015][04594] Updated weights for policy 0, policy_version 15464 (0.0447) +[2024-07-05 17:08:28,942][04005] Fps is (10 sec: 2049.6, 60 sec: 614.5, 300 sec: 4387.6). Total num frames: 63344640. Throughput: 0: 180.0. Samples: 10836548. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:08:28,944][04005] Avg episode reward: [(0, '52.471')] +[2024-07-05 17:08:32,765][04594] Updated weights for policy 0, policy_version 15474 (0.0016) +[2024-07-05 17:08:33,942][04005] Fps is (10 sec: 6147.8, 60 sec: 1297.2, 300 sec: 4401.5). Total num frames: 63389696. Throughput: 0: 316.8. Samples: 10842940. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:08:33,944][04005] Avg episode reward: [(0, '52.987')] +[2024-07-05 17:08:34,167][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015477_63393792.pth... +[2024-07-05 17:08:34,285][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015174_62152704.pth +[2024-07-05 17:08:37,433][04594] Updated weights for policy 0, policy_version 15484 (0.0013) +[2024-07-05 17:08:38,942][04005] Fps is (10 sec: 9011.9, 60 sec: 2048.3, 300 sec: 4429.4). Total num frames: 63434752. Throughput: 0: 597.7. Samples: 10856084. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:08:38,942][04005] Avg episode reward: [(0, '53.003')] +[2024-07-05 17:08:42,054][04594] Updated weights for policy 0, policy_version 15494 (0.0015) +[2024-07-05 17:08:43,941][04005] Fps is (10 sec: 9011.4, 60 sec: 2730.9, 300 sec: 4568.2). Total num frames: 63479808. Throughput: 0: 883.1. Samples: 10869426. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:08:43,942][04005] Avg episode reward: [(0, '52.538')] +[2024-07-05 17:08:46,674][04594] Updated weights for policy 0, policy_version 15504 (0.0015) +[2024-07-05 17:08:48,942][04005] Fps is (10 sec: 8601.6, 60 sec: 3413.8, 300 sec: 4707.1). Total num frames: 63520768. Throughput: 0: 1021.8. Samples: 10875922. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:08:48,943][04005] Avg episode reward: [(0, '52.207')] +[2024-07-05 17:08:51,276][04594] Updated weights for policy 0, policy_version 15514 (0.0015) +[2024-07-05 17:08:53,942][04005] Fps is (10 sec: 8601.4, 60 sec: 4164.8, 300 sec: 4845.9). Total num frames: 63565824. Throughput: 0: 1308.1. Samples: 10889304. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:08:53,943][04005] Avg episode reward: [(0, '51.142')] +[2024-07-05 17:08:55,874][04594] Updated weights for policy 0, policy_version 15524 (0.0015) +[2024-07-05 17:08:58,942][04005] Fps is (10 sec: 9010.9, 60 sec: 4847.5, 300 sec: 4998.6). Total num frames: 63610880. Throughput: 0: 1597.4. Samples: 10902800. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:08:58,943][04005] Avg episode reward: [(0, '51.608')] +[2024-07-05 17:09:00,464][04594] Updated weights for policy 0, policy_version 15534 (0.0016) +[2024-07-05 17:09:03,941][04005] Fps is (10 sec: 9011.3, 60 sec: 5598.9, 300 sec: 5137.5). Total num frames: 63655936. Throughput: 0: 1738.8. Samples: 10909428. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:09:03,943][04005] Avg episode reward: [(0, '52.604')] +[2024-07-05 17:09:05,079][04594] Updated weights for policy 0, policy_version 15544 (0.0016) +[2024-07-05 17:09:08,942][04005] Fps is (10 sec: 9011.5, 60 sec: 6282.5, 300 sec: 5290.3). Total num frames: 63700992. Throughput: 0: 2024.9. Samples: 10922768. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:09:08,942][04005] Avg episode reward: [(0, '52.189')] +[2024-07-05 17:09:09,716][04594] Updated weights for policy 0, policy_version 15554 (0.0017) +[2024-07-05 17:09:13,942][04005] Fps is (10 sec: 9011.0, 60 sec: 7032.4, 300 sec: 5429.1). Total num frames: 63746048. Throughput: 0: 2208.9. Samples: 10935948. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:09:13,943][04005] Avg episode reward: [(0, '53.022')] +[2024-07-05 17:09:14,371][04594] Updated weights for policy 0, policy_version 15564 (0.0017) +[2024-07-05 17:09:18,942][04005] Fps is (10 sec: 8601.6, 60 sec: 7715.3, 300 sec: 5554.0). Total num frames: 63787008. Throughput: 0: 2211.0. Samples: 10942434. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:09:18,943][04005] Avg episode reward: [(0, '52.312')] +[2024-07-05 17:09:19,025][04594] Updated weights for policy 0, policy_version 15574 (0.0016) +[2024-07-05 17:09:23,649][04594] Updated weights for policy 0, policy_version 15584 (0.0018) +[2024-07-05 17:09:23,941][04005] Fps is (10 sec: 8601.8, 60 sec: 8397.7, 300 sec: 5706.9). Total num frames: 63832064. Throughput: 0: 2213.7. Samples: 10955700. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:09:23,942][04005] Avg episode reward: [(0, '51.219')] +[2024-07-05 17:09:28,278][04594] Updated weights for policy 0, policy_version 15594 (0.0017) +[2024-07-05 17:09:28,942][04005] Fps is (10 sec: 9011.2, 60 sec: 8874.8, 300 sec: 5845.8). Total num frames: 63877120. Throughput: 0: 2212.8. Samples: 10969004. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:09:28,943][04005] Avg episode reward: [(0, '51.541')] +[2024-07-05 17:09:32,918][04594] Updated weights for policy 0, policy_version 15604 (0.0015) +[2024-07-05 17:09:33,943][04005] Fps is (10 sec: 9009.9, 60 sec: 8874.5, 300 sec: 5998.3). Total num frames: 63922176. Throughput: 0: 2219.0. Samples: 10975780. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:09:33,956][04005] Avg episode reward: [(0, '52.768')] +[2024-07-05 17:09:37,544][04594] Updated weights for policy 0, policy_version 15614 (0.0017) +[2024-07-05 17:09:38,941][04005] Fps is (10 sec: 9011.3, 60 sec: 8874.7, 300 sec: 6137.3). Total num frames: 63967232. Throughput: 0: 2216.7. Samples: 10989056. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:09:38,942][04005] Avg episode reward: [(0, '51.510')] +[2024-07-05 17:09:42,156][04594] Updated weights for policy 0, policy_version 15624 (0.0016) +[2024-07-05 17:09:43,942][04005] Fps is (10 sec: 8602.7, 60 sec: 8806.4, 300 sec: 6276.1). Total num frames: 64008192. Throughput: 0: 2213.2. Samples: 11002394. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:09:43,943][04005] Avg episode reward: [(0, '49.879')] +[2024-07-05 17:09:46,814][04594] Updated weights for policy 0, policy_version 15634 (0.0015) +[2024-07-05 17:09:48,942][04005] Fps is (10 sec: 8601.5, 60 sec: 8874.7, 300 sec: 6414.9). Total num frames: 64053248. Throughput: 0: 2209.0. Samples: 11008832. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:09:48,943][04005] Avg episode reward: [(0, '49.068')] +[2024-07-05 17:09:51,420][04594] Updated weights for policy 0, policy_version 15644 (0.0015) +[2024-07-05 17:09:53,941][04005] Fps is (10 sec: 9011.3, 60 sec: 8874.7, 300 sec: 6498.1). Total num frames: 64098304. Throughput: 0: 2207.6. Samples: 11022112. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:09:53,942][04005] Avg episode reward: [(0, '49.103')] +[2024-07-05 17:09:56,058][04594] Updated weights for policy 0, policy_version 15654 (0.0015) +[2024-07-05 17:09:58,941][04005] Fps is (10 sec: 9011.3, 60 sec: 8874.7, 300 sec: 6511.9). Total num frames: 64143360. Throughput: 0: 2209.9. Samples: 11035392. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:09:58,942][04005] Avg episode reward: [(0, '50.379')] +[2024-07-05 17:10:00,679][04594] Updated weights for policy 0, policy_version 15664 (0.0015) +[2024-07-05 17:10:03,941][04005] Fps is (10 sec: 9011.2, 60 sec: 8874.7, 300 sec: 6511.9). Total num frames: 64188416. Throughput: 0: 2214.8. Samples: 11042102. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:10:03,942][04005] Avg episode reward: [(0, '51.395')] +[2024-07-05 17:10:05,299][04594] Updated weights for policy 0, policy_version 15674 (0.0015) +[2024-07-05 17:10:08,942][04005] Fps is (10 sec: 8601.4, 60 sec: 8806.4, 300 sec: 6498.1). Total num frames: 64229376. Throughput: 0: 2212.8. Samples: 11055278. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:10:08,943][04005] Avg episode reward: [(0, '52.641')] +[2024-07-05 17:10:09,965][04594] Updated weights for policy 0, policy_version 15684 (0.0015) +[2024-07-05 17:10:13,942][04005] Fps is (10 sec: 8601.5, 60 sec: 8806.4, 300 sec: 6484.2). Total num frames: 64274432. Throughput: 0: 2209.0. Samples: 11068410. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:10:13,943][04005] Avg episode reward: [(0, '52.032')] +[2024-07-05 17:10:14,629][04594] Updated weights for policy 0, policy_version 15694 (0.0014) +[2024-07-05 17:10:18,941][04005] Fps is (10 sec: 9011.3, 60 sec: 8874.7, 300 sec: 6484.2). Total num frames: 64319488. Throughput: 0: 2207.7. Samples: 11075122. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:10:18,943][04005] Avg episode reward: [(0, '51.952')] +[2024-07-05 17:10:19,366][04594] Updated weights for policy 0, policy_version 15704 (0.0016) +[2024-07-05 17:10:23,947][04005] Fps is (10 sec: 8188.6, 60 sec: 8737.5, 300 sec: 6456.3). Total num frames: 64356352. Throughput: 0: 2193.7. Samples: 11087784. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:10:23,977][04005] Avg episode reward: [(0, '51.673')] +[2024-07-05 17:10:28,950][04005] Fps is (10 sec: 3683.9, 60 sec: 7986.3, 300 sec: 6303.5). Total num frames: 64356352. Throughput: 0: 1929.2. Samples: 11089222. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:10:29,016][04005] Avg episode reward: [(0, '52.153')] +[2024-07-05 17:10:33,954][04005] Fps is (10 sec: 409.4, 60 sec: 7303.5, 300 sec: 6164.8). Total num frames: 64360448. Throughput: 0: 1792.2. Samples: 11089498. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:10:34,022][04005] Avg episode reward: [(0, '52.542')] +[2024-07-05 17:10:34,423][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015713_64360448.pth... +[2024-07-05 17:10:38,164][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015388_63029248.pth +[2024-07-05 17:10:38,952][04005] Fps is (10 sec: 409.5, 60 sec: 6552.6, 300 sec: 6011.9). Total num frames: 64360448. Throughput: 0: 1508.8. Samples: 11090020. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:10:39,000][04005] Avg episode reward: [(0, '52.592')] +[2024-07-05 17:10:43,949][04005] Fps is (10 sec: 0.0, 60 sec: 5870.3, 300 sec: 5873.1). Total num frames: 64360448. Throughput: 0: 1225.7. Samples: 11090556. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:10:43,989][04005] Avg episode reward: [(0, '52.632')] +[2024-07-05 17:10:44,140][04594] Updated weights for policy 0, policy_version 15714 (0.0197) +[2024-07-05 17:10:48,948][04005] Fps is (10 sec: 409.7, 60 sec: 5187.8, 300 sec: 5734.3). Total num frames: 64364544. Throughput: 0: 1082.6. Samples: 11090824. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:10:48,970][04005] Avg episode reward: [(0, '52.702')] +[2024-07-05 17:10:53,952][04005] Fps is (10 sec: 409.5, 60 sec: 4436.7, 300 sec: 5581.5). Total num frames: 64364544. Throughput: 0: 802.2. Samples: 11091382. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:10:54,031][04005] Avg episode reward: [(0, '52.702')] +[2024-07-05 17:10:58,950][04005] Fps is (10 sec: 409.6, 60 sec: 3754.2, 300 sec: 5442.7). Total num frames: 64368640. Throughput: 0: 522.1. Samples: 11091908. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:10:59,002][04005] Avg episode reward: [(0, '52.702')] +[2024-07-05 17:11:03,941][04005] Fps is (10 sec: 3279.7, 60 sec: 3481.6, 300 sec: 5387.3). Total num frames: 64397312. Throughput: 0: 421.0. Samples: 11094068. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-07-05 17:11:03,943][04005] Avg episode reward: [(0, '51.561')] +[2024-07-05 17:11:04,414][04594] Updated weights for policy 0, policy_version 15724 (0.0075) +[2024-07-05 17:11:07,884][04594] Updated weights for policy 0, policy_version 15734 (0.0012) +[2024-07-05 17:11:08,941][04005] Fps is (10 sec: 9017.6, 60 sec: 3822.9, 300 sec: 5456.7). Total num frames: 64458752. Throughput: 0: 514.0. Samples: 11110910. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:11:08,942][04005] Avg episode reward: [(0, '51.553')] +[2024-07-05 17:11:11,392][04594] Updated weights for policy 0, policy_version 15744 (0.0012) +[2024-07-05 17:11:13,941][04005] Fps is (10 sec: 11878.5, 60 sec: 4027.7, 300 sec: 5498.4). Total num frames: 64516096. Throughput: 0: 868.9. Samples: 11128318. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:11:13,942][04005] Avg episode reward: [(0, '50.989')] +[2024-07-05 17:11:14,902][04594] Updated weights for policy 0, policy_version 15754 (0.0012) +[2024-07-05 17:11:18,399][04594] Updated weights for policy 0, policy_version 15764 (0.0012) +[2024-07-05 17:11:18,941][04005] Fps is (10 sec: 11468.7, 60 sec: 4232.5, 300 sec: 5540.0). Total num frames: 64573440. Throughput: 0: 1052.8. Samples: 11136862. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:11:18,942][04005] Avg episode reward: [(0, '51.298')] +[2024-07-05 17:11:21,886][04594] Updated weights for policy 0, policy_version 15774 (0.0011) +[2024-07-05 17:11:23,941][04005] Fps is (10 sec: 11468.9, 60 sec: 4574.2, 300 sec: 5581.7). Total num frames: 64630784. Throughput: 0: 1438.2. Samples: 11154728. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:11:23,942][04005] Avg episode reward: [(0, '50.336')] +[2024-07-05 17:11:25,380][04594] Updated weights for policy 0, policy_version 15784 (0.0012) +[2024-07-05 17:11:28,862][04594] Updated weights for policy 0, policy_version 15794 (0.0012) +[2024-07-05 17:11:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 5598.5, 300 sec: 5651.1). Total num frames: 64692224. Throughput: 0: 1818.0. Samples: 11172354. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:11:28,942][04005] Avg episode reward: [(0, '50.771')] +[2024-07-05 17:11:32,356][04594] Updated weights for policy 0, policy_version 15804 (0.0012) +[2024-07-05 17:11:33,941][04005] Fps is (10 sec: 11878.3, 60 sec: 6486.4, 300 sec: 5692.7). Total num frames: 64749568. Throughput: 0: 2001.7. Samples: 11180890. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:11:33,942][04005] Avg episode reward: [(0, '51.491')] +[2024-07-05 17:11:35,846][04594] Updated weights for policy 0, policy_version 15814 (0.0012) +[2024-07-05 17:11:38,942][04005] Fps is (10 sec: 11468.7, 60 sec: 7442.1, 300 sec: 5734.4). Total num frames: 64806912. Throughput: 0: 2384.3. Samples: 11198656. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:11:38,942][04005] Avg episode reward: [(0, '50.412')] +[2024-07-05 17:11:39,345][04594] Updated weights for policy 0, policy_version 15824 (0.0011) +[2024-07-05 17:11:42,835][04594] Updated weights for policy 0, policy_version 15834 (0.0012) +[2024-07-05 17:11:43,941][04005] Fps is (10 sec: 11878.4, 60 sec: 8465.9, 300 sec: 5789.9). Total num frames: 64868352. Throughput: 0: 2766.3. Samples: 11216370. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:11:43,942][04005] Avg episode reward: [(0, '50.521')] +[2024-07-05 17:11:46,330][04594] Updated weights for policy 0, policy_version 15844 (0.0011) +[2024-07-05 17:11:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 9353.5, 300 sec: 5831.6). Total num frames: 64925696. Throughput: 0: 2907.6. Samples: 11224912. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:11:48,942][04005] Avg episode reward: [(0, '50.502')] +[2024-07-05 17:11:49,816][04594] Updated weights for policy 0, policy_version 15854 (0.0011) +[2024-07-05 17:11:53,309][04594] Updated weights for policy 0, policy_version 15864 (0.0012) +[2024-07-05 17:11:53,942][04005] Fps is (10 sec: 11468.6, 60 sec: 10309.8, 300 sec: 5887.1). Total num frames: 64983040. Throughput: 0: 2928.2. Samples: 11242680. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:11:53,943][04005] Avg episode reward: [(0, '50.272')] +[2024-07-05 17:11:56,800][04594] Updated weights for policy 0, policy_version 15874 (0.0012) +[2024-07-05 17:11:58,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11265.3, 300 sec: 5942.7). Total num frames: 65044480. Throughput: 0: 2935.8. Samples: 11260428. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:11:58,942][04005] Avg episode reward: [(0, '52.633')] +[2024-07-05 17:12:00,312][04594] Updated weights for policy 0, policy_version 15884 (0.0012) +[2024-07-05 17:12:03,804][04594] Updated weights for policy 0, policy_version 15894 (0.0012) +[2024-07-05 17:12:03,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11741.9, 300 sec: 6095.6). Total num frames: 65101824. Throughput: 0: 2934.8. Samples: 11268926. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:12:03,942][04005] Avg episode reward: [(0, '52.778')] +[2024-07-05 17:12:07,296][04594] Updated weights for policy 0, policy_version 15904 (0.0012) +[2024-07-05 17:12:08,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11673.6, 300 sec: 6290.0). Total num frames: 65159168. Throughput: 0: 2928.5. Samples: 11286512. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:12:08,942][04005] Avg episode reward: [(0, '53.802')] +[2024-07-05 17:12:10,793][04594] Updated weights for policy 0, policy_version 15914 (0.0011) +[2024-07-05 17:12:13,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11741.9, 300 sec: 6485.0). Total num frames: 65220608. Throughput: 0: 2930.3. Samples: 11304220. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:12:13,943][04005] Avg episode reward: [(0, '53.616')] +[2024-07-05 17:12:14,294][04594] Updated weights for policy 0, policy_version 15924 (0.0012) +[2024-07-05 17:12:17,792][04594] Updated weights for policy 0, policy_version 15934 (0.0012) +[2024-07-05 17:12:18,941][04005] Fps is (10 sec: 11878.3, 60 sec: 11741.9, 300 sec: 6678.8). Total num frames: 65277952. Throughput: 0: 2932.2. Samples: 11312838. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:12:18,943][04005] Avg episode reward: [(0, '53.807')] +[2024-07-05 17:12:21,290][04594] Updated weights for policy 0, policy_version 15944 (0.0012) +[2024-07-05 17:12:23,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11741.9, 300 sec: 6873.1). Total num frames: 65335296. Throughput: 0: 2924.5. Samples: 11330260. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:12:23,942][04005] Avg episode reward: [(0, '53.207')] +[2024-07-05 17:12:24,846][04594] Updated weights for policy 0, policy_version 15954 (0.0012) +[2024-07-05 17:12:28,552][04594] Updated weights for policy 0, policy_version 15964 (0.0012) +[2024-07-05 17:12:28,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11673.6, 300 sec: 7053.6). Total num frames: 65392640. Throughput: 0: 2911.5. Samples: 11347386. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:12:28,942][04005] Avg episode reward: [(0, '52.467')] +[2024-07-05 17:12:32,226][04594] Updated weights for policy 0, policy_version 15974 (0.0012) +[2024-07-05 17:12:33,942][04005] Fps is (10 sec: 11059.1, 60 sec: 11605.3, 300 sec: 7234.2). Total num frames: 65445888. Throughput: 0: 2906.1. Samples: 11355686. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:12:33,942][04005] Avg episode reward: [(0, '50.129')] +[2024-07-05 17:12:34,100][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015979_65449984.pth... +[2024-07-05 17:12:34,174][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015477_63393792.pth +[2024-07-05 17:12:35,920][04594] Updated weights for policy 0, policy_version 15984 (0.0012) +[2024-07-05 17:12:38,941][04005] Fps is (10 sec: 11059.3, 60 sec: 11605.4, 300 sec: 7414.6). Total num frames: 65503232. Throughput: 0: 2882.1. Samples: 11372374. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:12:38,942][04005] Avg episode reward: [(0, '49.886')] +[2024-07-05 17:12:39,493][04594] Updated weights for policy 0, policy_version 15994 (0.0012) +[2024-07-05 17:12:43,031][04594] Updated weights for policy 0, policy_version 16004 (0.0011) +[2024-07-05 17:12:43,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11537.1, 300 sec: 7609.1). Total num frames: 65560576. Throughput: 0: 2872.0. Samples: 11389668. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:12:43,942][04005] Avg episode reward: [(0, '50.522')] +[2024-07-05 17:12:46,555][04594] Updated weights for policy 0, policy_version 16014 (0.0012) +[2024-07-05 17:12:48,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11537.1, 300 sec: 7803.4). Total num frames: 65617920. Throughput: 0: 2880.7. Samples: 11398556. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:12:48,943][04005] Avg episode reward: [(0, '52.184')] +[2024-07-05 17:12:50,138][04594] Updated weights for policy 0, policy_version 16024 (0.0012) +[2024-07-05 17:12:53,749][04594] Updated weights for policy 0, policy_version 16034 (0.0011) +[2024-07-05 17:12:53,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11537.1, 300 sec: 7983.9). Total num frames: 65675264. Throughput: 0: 2867.7. Samples: 11415560. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:12:53,943][04005] Avg episode reward: [(0, '52.362')] +[2024-07-05 17:12:57,273][04594] Updated weights for policy 0, policy_version 16044 (0.0012) +[2024-07-05 17:12:58,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11468.8, 300 sec: 8178.4). Total num frames: 65732608. Throughput: 0: 2857.0. Samples: 11432786. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:12:58,942][04005] Avg episode reward: [(0, '52.552')] +[2024-07-05 17:13:00,800][04594] Updated weights for policy 0, policy_version 16054 (0.0012) +[2024-07-05 17:13:03,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11468.8, 300 sec: 8359.2). Total num frames: 65789952. Throughput: 0: 2863.6. Samples: 11441702. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:13:03,942][04005] Avg episode reward: [(0, '51.249')] +[2024-07-05 17:13:04,318][04594] Updated weights for policy 0, policy_version 16064 (0.0012) +[2024-07-05 17:13:07,880][04594] Updated weights for policy 0, policy_version 16074 (0.0012) +[2024-07-05 17:13:08,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11468.8, 300 sec: 8553.2). Total num frames: 65847296. Throughput: 0: 2863.1. Samples: 11459100. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:13:08,942][04005] Avg episode reward: [(0, '49.092')] +[2024-07-05 17:13:11,555][04594] Updated weights for policy 0, policy_version 16084 (0.0012) +[2024-07-05 17:13:13,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11400.5, 300 sec: 8747.7). Total num frames: 65904640. Throughput: 0: 2854.4. Samples: 11475834. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:13:13,943][04005] Avg episode reward: [(0, '48.952')] +[2024-07-05 17:13:15,087][04594] Updated weights for policy 0, policy_version 16094 (0.0012) +[2024-07-05 17:13:18,637][04594] Updated weights for policy 0, policy_version 16104 (0.0012) +[2024-07-05 17:13:18,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11400.5, 300 sec: 8928.1). Total num frames: 65961984. Throughput: 0: 2867.8. Samples: 11484738. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:13:18,943][04005] Avg episode reward: [(0, '50.410')] +[2024-07-05 17:13:22,345][04594] Updated weights for policy 0, policy_version 16114 (0.0013) +[2024-07-05 17:13:23,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11400.5, 300 sec: 9066.8). Total num frames: 66019328. Throughput: 0: 2867.7. Samples: 11501422. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:13:23,942][04005] Avg episode reward: [(0, '52.836')] +[2024-07-05 17:13:26,021][04594] Updated weights for policy 0, policy_version 16124 (0.0012) +[2024-07-05 17:13:28,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11400.6, 300 sec: 9108.4). Total num frames: 66076672. Throughput: 0: 2868.3. Samples: 11518742. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:13:28,942][04005] Avg episode reward: [(0, '52.864')] +[2024-07-05 17:13:29,542][04594] Updated weights for policy 0, policy_version 16134 (0.0012) +[2024-07-05 17:13:33,116][04594] Updated weights for policy 0, policy_version 16144 (0.0012) +[2024-07-05 17:13:33,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11468.8, 300 sec: 9150.0). Total num frames: 66134016. Throughput: 0: 2858.6. Samples: 11527194. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:13:33,943][04005] Avg episode reward: [(0, '54.369')] +[2024-07-05 17:13:36,671][04594] Updated weights for policy 0, policy_version 16154 (0.0012) +[2024-07-05 17:13:38,942][04005] Fps is (10 sec: 11468.5, 60 sec: 11468.8, 300 sec: 9191.7). Total num frames: 66191360. Throughput: 0: 2866.0. Samples: 11544528. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:13:38,943][04005] Avg episode reward: [(0, '53.155')] +[2024-07-05 17:13:40,219][04594] Updated weights for policy 0, policy_version 16164 (0.0012) +[2024-07-05 17:13:43,847][04594] Updated weights for policy 0, policy_version 16174 (0.0012) +[2024-07-05 17:13:43,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11468.8, 300 sec: 9247.2). Total num frames: 66248704. Throughput: 0: 2865.7. Samples: 11561742. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:13:43,942][04005] Avg episode reward: [(0, '52.922')] +[2024-07-05 17:13:47,368][04594] Updated weights for policy 0, policy_version 16184 (0.0012) +[2024-07-05 17:13:48,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11468.8, 300 sec: 9288.9). Total num frames: 66306048. Throughput: 0: 2857.0. Samples: 11570266. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:13:48,942][04005] Avg episode reward: [(0, '52.796')] +[2024-07-05 17:13:50,999][04594] Updated weights for policy 0, policy_version 16194 (0.0011) +[2024-07-05 17:13:53,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11468.8, 300 sec: 9330.6). Total num frames: 66363392. Throughput: 0: 2852.2. Samples: 11587448. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:13:53,943][04005] Avg episode reward: [(0, '52.305')] +[2024-07-05 17:13:54,562][04594] Updated weights for policy 0, policy_version 16204 (0.0011) +[2024-07-05 17:13:58,439][04594] Updated weights for policy 0, policy_version 16214 (0.0013) +[2024-07-05 17:13:58,942][04005] Fps is (10 sec: 11059.2, 60 sec: 11400.5, 300 sec: 9358.3). Total num frames: 66416640. Throughput: 0: 2845.9. Samples: 11603900. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:13:58,943][04005] Avg episode reward: [(0, '53.819')] +[2024-07-05 17:14:02,077][04594] Updated weights for policy 0, policy_version 16224 (0.0012) +[2024-07-05 17:14:03,941][04005] Fps is (10 sec: 11059.2, 60 sec: 11400.5, 300 sec: 9400.0). Total num frames: 66473984. Throughput: 0: 2831.3. Samples: 11612148. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:14:03,942][04005] Avg episode reward: [(0, '53.363')] +[2024-07-05 17:14:05,756][04594] Updated weights for policy 0, policy_version 16234 (0.0012) +[2024-07-05 17:14:08,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11400.5, 300 sec: 9441.6). Total num frames: 66531328. Throughput: 0: 2840.2. Samples: 11629230. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2024-07-05 17:14:08,942][04005] Avg episode reward: [(0, '54.270')] +[2024-07-05 17:14:09,295][04594] Updated weights for policy 0, policy_version 16244 (0.0012) +[2024-07-05 17:14:12,908][04594] Updated weights for policy 0, policy_version 16254 (0.0012) +[2024-07-05 17:14:13,941][04005] Fps is (10 sec: 11059.2, 60 sec: 11332.3, 300 sec: 9483.3). Total num frames: 66584576. Throughput: 0: 2832.8. Samples: 11646216. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:14:13,942][04005] Avg episode reward: [(0, '53.072')] +[2024-07-05 17:14:16,476][04594] Updated weights for policy 0, policy_version 16264 (0.0011) +[2024-07-05 17:14:18,941][04005] Fps is (10 sec: 11059.3, 60 sec: 11332.3, 300 sec: 9524.9). Total num frames: 66641920. Throughput: 0: 2836.6. Samples: 11654842. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:14:18,942][04005] Avg episode reward: [(0, '52.267')] +[2024-07-05 17:14:20,031][04594] Updated weights for policy 0, policy_version 16274 (0.0012) +[2024-07-05 17:14:23,566][04594] Updated weights for policy 0, policy_version 16284 (0.0012) +[2024-07-05 17:14:23,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11400.5, 300 sec: 9580.5). Total num frames: 66703360. Throughput: 0: 2837.7. Samples: 11672226. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:14:23,942][04005] Avg episode reward: [(0, '51.190')] +[2024-07-05 17:14:27,112][04594] Updated weights for policy 0, policy_version 16294 (0.0012) +[2024-07-05 17:14:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11400.5, 300 sec: 9622.2). Total num frames: 66760704. Throughput: 0: 2841.9. Samples: 11689626. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:14:28,942][04005] Avg episode reward: [(0, '51.173')] +[2024-07-05 17:14:30,631][04594] Updated weights for policy 0, policy_version 16304 (0.0012) +[2024-07-05 17:14:33,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11400.6, 300 sec: 9663.8). Total num frames: 66818048. Throughput: 0: 2841.4. Samples: 11698130. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:14:33,942][04005] Avg episode reward: [(0, '51.412')] +[2024-07-05 17:14:34,140][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000016314_66822144.pth... +[2024-07-05 17:14:34,142][04594] Updated weights for policy 0, policy_version 16314 (0.0012) +[2024-07-05 17:14:34,214][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015713_64360448.pth +[2024-07-05 17:14:37,673][04594] Updated weights for policy 0, policy_version 16324 (0.0011) +[2024-07-05 17:14:38,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11400.6, 300 sec: 9719.3). Total num frames: 66875392. Throughput: 0: 2847.1. Samples: 11715568. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:14:38,942][04005] Avg episode reward: [(0, '49.609')] +[2024-07-05 17:14:41,208][04594] Updated weights for policy 0, policy_version 16334 (0.0012) +[2024-07-05 17:14:43,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11400.5, 300 sec: 9761.0). Total num frames: 66932736. Throughput: 0: 2867.2. Samples: 11732926. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:14:43,942][04005] Avg episode reward: [(0, '48.476')] +[2024-07-05 17:14:44,737][04594] Updated weights for policy 0, policy_version 16344 (0.0012) +[2024-07-05 17:14:48,260][04594] Updated weights for policy 0, policy_version 16354 (0.0012) +[2024-07-05 17:14:48,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11400.5, 300 sec: 9802.6). Total num frames: 66990080. Throughput: 0: 2882.2. Samples: 11741848. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:14:48,942][04005] Avg episode reward: [(0, '48.407')] +[2024-07-05 17:14:51,789][04594] Updated weights for policy 0, policy_version 16364 (0.0012) +[2024-07-05 17:14:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11468.8, 300 sec: 9858.2). Total num frames: 67051520. Throughput: 0: 2889.0. Samples: 11759236. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:14:53,942][04005] Avg episode reward: [(0, '46.632')] +[2024-07-05 17:14:55,323][04594] Updated weights for policy 0, policy_version 16374 (0.0012) +[2024-07-05 17:14:58,839][04594] Updated weights for policy 0, policy_version 16384 (0.0012) +[2024-07-05 17:14:58,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11537.1, 300 sec: 9899.8). Total num frames: 67108864. Throughput: 0: 2898.8. Samples: 11776664. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:14:58,942][04005] Avg episode reward: [(0, '49.165')] +[2024-07-05 17:15:02,372][04594] Updated weights for policy 0, policy_version 16394 (0.0011) +[2024-07-05 17:15:03,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11537.1, 300 sec: 9955.4). Total num frames: 67166208. Throughput: 0: 2895.5. Samples: 11785140. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:15:03,942][04005] Avg episode reward: [(0, '46.908')] +[2024-07-05 17:15:05,893][04594] Updated weights for policy 0, policy_version 16404 (0.0011) +[2024-07-05 17:15:08,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11537.1, 300 sec: 9997.0). Total num frames: 67223552. Throughput: 0: 2896.6. Samples: 11802574. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:15:08,943][04005] Avg episode reward: [(0, '48.900')] +[2024-07-05 17:15:09,466][04594] Updated weights for policy 0, policy_version 16414 (0.0012) +[2024-07-05 17:15:12,961][04594] Updated weights for policy 0, policy_version 16424 (0.0012) +[2024-07-05 17:15:13,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 10038.7). Total num frames: 67280896. Throughput: 0: 2896.8. Samples: 11819984. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:15:13,943][04005] Avg episode reward: [(0, '48.531')] +[2024-07-05 17:15:16,471][04594] Updated weights for policy 0, policy_version 16434 (0.0011) +[2024-07-05 17:15:18,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11673.6, 300 sec: 10122.1). Total num frames: 67342336. Throughput: 0: 2906.5. Samples: 11828922. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:15:18,942][04005] Avg episode reward: [(0, '48.714')] +[2024-07-05 17:15:19,998][04594] Updated weights for policy 0, policy_version 16444 (0.0012) +[2024-07-05 17:15:23,514][04594] Updated weights for policy 0, policy_version 16454 (0.0012) +[2024-07-05 17:15:23,942][04005] Fps is (10 sec: 11878.4, 60 sec: 11605.3, 300 sec: 10316.6). Total num frames: 67399680. Throughput: 0: 2906.5. Samples: 11846362. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:15:23,942][04005] Avg episode reward: [(0, '49.089')] +[2024-07-05 17:15:27,039][04594] Updated weights for policy 0, policy_version 16464 (0.0012) +[2024-07-05 17:15:28,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11605.3, 300 sec: 10497.2). Total num frames: 67457024. Throughput: 0: 2907.7. Samples: 11863772. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:15:28,942][04005] Avg episode reward: [(0, '49.819')] +[2024-07-05 17:15:30,565][04594] Updated weights for policy 0, policy_version 16474 (0.0012) +[2024-07-05 17:15:33,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 10691.6). Total num frames: 67514368. Throughput: 0: 2901.8. Samples: 11872430. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:15:33,942][04005] Avg episode reward: [(0, '52.236')] +[2024-07-05 17:15:34,090][04594] Updated weights for policy 0, policy_version 16484 (0.0012) +[2024-07-05 17:15:37,638][04594] Updated weights for policy 0, policy_version 16494 (0.0012) +[2024-07-05 17:15:38,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 10885.9). Total num frames: 67571712. Throughput: 0: 2900.5. Samples: 11889760. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:15:38,942][04005] Avg episode reward: [(0, '51.859')] +[2024-07-05 17:15:41,183][04594] Updated weights for policy 0, policy_version 16504 (0.0012) +[2024-07-05 17:15:43,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11605.3, 300 sec: 11066.4). Total num frames: 67629056. Throughput: 0: 2898.6. Samples: 11907100. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:15:43,943][04005] Avg episode reward: [(0, '52.390')] +[2024-07-05 17:15:44,698][04594] Updated weights for policy 0, policy_version 16514 (0.0012) +[2024-07-05 17:15:48,219][04594] Updated weights for policy 0, policy_version 16524 (0.0012) +[2024-07-05 17:15:48,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11673.6, 300 sec: 11274.8). Total num frames: 67690496. Throughput: 0: 2907.8. Samples: 11915992. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2024-07-05 17:15:48,942][04005] Avg episode reward: [(0, '52.316')] +[2024-07-05 17:15:51,758][04594] Updated weights for policy 0, policy_version 16534 (0.0012) +[2024-07-05 17:15:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11605.3, 300 sec: 11455.2). Total num frames: 67747840. Throughput: 0: 2907.2. Samples: 11933398. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:15:53,942][04005] Avg episode reward: [(0, '51.584')] +[2024-07-05 17:15:55,300][04594] Updated weights for policy 0, policy_version 16544 (0.0012) +[2024-07-05 17:15:58,827][04594] Updated weights for policy 0, policy_version 16554 (0.0012) +[2024-07-05 17:15:58,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11552.1). Total num frames: 67805184. Throughput: 0: 2905.8. Samples: 11950746. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:15:58,942][04005] Avg episode reward: [(0, '52.465')] +[2024-07-05 17:16:02,358][04594] Updated weights for policy 0, policy_version 16564 (0.0012) +[2024-07-05 17:16:03,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11538.2). Total num frames: 67862528. Throughput: 0: 2896.0. Samples: 11959244. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:03,942][04005] Avg episode reward: [(0, '52.668')] +[2024-07-05 17:16:05,864][04594] Updated weights for policy 0, policy_version 16574 (0.0011) +[2024-07-05 17:16:08,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11538.2). Total num frames: 67919872. Throughput: 0: 2899.1. Samples: 11976820. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:08,942][04005] Avg episode reward: [(0, '52.930')] +[2024-07-05 17:16:09,390][04594] Updated weights for policy 0, policy_version 16584 (0.0012) +[2024-07-05 17:16:12,922][04594] Updated weights for policy 0, policy_version 16594 (0.0012) +[2024-07-05 17:16:13,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11538.2). Total num frames: 67977216. Throughput: 0: 2898.2. Samples: 11994190. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:13,942][04005] Avg episode reward: [(0, '52.417')] +[2024-07-05 17:16:16,455][04594] Updated weights for policy 0, policy_version 16604 (0.0012) +[2024-07-05 17:16:18,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11605.3, 300 sec: 11552.1). Total num frames: 68038656. Throughput: 0: 2901.6. Samples: 12003002. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:18,942][04005] Avg episode reward: [(0, '50.826')] +[2024-07-05 17:16:19,993][04594] Updated weights for policy 0, policy_version 16614 (0.0012) +[2024-07-05 17:16:23,524][04594] Updated weights for policy 0, policy_version 16624 (0.0012) +[2024-07-05 17:16:23,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11605.4, 300 sec: 11538.2). Total num frames: 68096000. Throughput: 0: 2902.7. Samples: 12020380. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:23,942][04005] Avg episode reward: [(0, '51.293')] +[2024-07-05 17:16:27,062][04594] Updated weights for policy 0, policy_version 16634 (0.0012) +[2024-07-05 17:16:28,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11538.2). Total num frames: 68153344. Throughput: 0: 2903.4. Samples: 12037752. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:28,942][04005] Avg episode reward: [(0, '51.947')] +[2024-07-05 17:16:30,574][04594] Updated weights for policy 0, policy_version 16644 (0.0011) +[2024-07-05 17:16:33,942][04005] Fps is (10 sec: 11468.5, 60 sec: 11605.3, 300 sec: 11538.2). Total num frames: 68210688. Throughput: 0: 2897.7. Samples: 12046390. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:33,943][04005] Avg episode reward: [(0, '51.119')] +[2024-07-05 17:16:34,089][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000016654_68214784.pth... +[2024-07-05 17:16:34,091][04594] Updated weights for policy 0, policy_version 16654 (0.0013) +[2024-07-05 17:16:34,164][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000015979_65449984.pth +[2024-07-05 17:16:37,631][04594] Updated weights for policy 0, policy_version 16664 (0.0012) +[2024-07-05 17:16:38,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11524.3). Total num frames: 68268032. Throughput: 0: 2897.5. Samples: 12063786. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:38,942][04005] Avg episode reward: [(0, '51.911')] +[2024-07-05 17:16:41,180][04594] Updated weights for policy 0, policy_version 16674 (0.0012) +[2024-07-05 17:16:43,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11605.4, 300 sec: 11524.3). Total num frames: 68325376. Throughput: 0: 2896.8. Samples: 12081100. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:43,942][04005] Avg episode reward: [(0, '50.072')] +[2024-07-05 17:16:44,710][04594] Updated weights for policy 0, policy_version 16684 (0.0012) +[2024-07-05 17:16:48,225][04594] Updated weights for policy 0, policy_version 16694 (0.0012) +[2024-07-05 17:16:48,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11605.3, 300 sec: 11538.2). Total num frames: 68386816. Throughput: 0: 2905.9. Samples: 12090010. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:48,943][04005] Avg episode reward: [(0, '52.223')] +[2024-07-05 17:16:51,766][04594] Updated weights for policy 0, policy_version 16704 (0.0012) +[2024-07-05 17:16:53,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11605.3, 300 sec: 11524.3). Total num frames: 68444160. Throughput: 0: 2901.9. Samples: 12107406. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:53,942][04005] Avg episode reward: [(0, '51.167')] +[2024-07-05 17:16:55,304][04594] Updated weights for policy 0, policy_version 16714 (0.0012) +[2024-07-05 17:16:58,835][04594] Updated weights for policy 0, policy_version 16724 (0.0011) +[2024-07-05 17:16:58,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11524.3). Total num frames: 68501504. Throughput: 0: 2901.4. Samples: 12124752. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:16:58,942][04005] Avg episode reward: [(0, '51.364')] +[2024-07-05 17:17:02,365][04594] Updated weights for policy 0, policy_version 16734 (0.0012) +[2024-07-05 17:17:03,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11605.3, 300 sec: 11524.3). Total num frames: 68558848. Throughput: 0: 2895.1. Samples: 12133280. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:03,943][04005] Avg episode reward: [(0, '51.426')] +[2024-07-05 17:17:05,896][04594] Updated weights for policy 0, policy_version 16744 (0.0012) +[2024-07-05 17:17:08,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11510.5). Total num frames: 68616192. Throughput: 0: 2895.5. Samples: 12150676. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:08,943][04005] Avg episode reward: [(0, '52.885')] +[2024-07-05 17:17:09,463][04594] Updated weights for policy 0, policy_version 16754 (0.0011) +[2024-07-05 17:17:12,970][04594] Updated weights for policy 0, policy_version 16764 (0.0012) +[2024-07-05 17:17:13,941][04005] Fps is (10 sec: 11469.0, 60 sec: 11605.4, 300 sec: 11510.5). Total num frames: 68673536. Throughput: 0: 2895.9. Samples: 12168068. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:13,942][04005] Avg episode reward: [(0, '53.670')] +[2024-07-05 17:17:16,493][04594] Updated weights for policy 0, policy_version 16774 (0.0011) +[2024-07-05 17:17:18,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11537.1, 300 sec: 11510.5). Total num frames: 68730880. Throughput: 0: 2902.1. Samples: 12176986. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:18,942][04005] Avg episode reward: [(0, '54.096')] +[2024-07-05 17:17:20,025][04594] Updated weights for policy 0, policy_version 16784 (0.0012) +[2024-07-05 17:17:23,546][04594] Updated weights for policy 0, policy_version 16794 (0.0012) +[2024-07-05 17:17:23,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11605.3, 300 sec: 11524.3). Total num frames: 68792320. Throughput: 0: 2902.0. Samples: 12194376. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:23,942][04005] Avg episode reward: [(0, '53.991')] +[2024-07-05 17:17:27,100][04594] Updated weights for policy 0, policy_version 16804 (0.0012) +[2024-07-05 17:17:28,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11605.4, 300 sec: 11538.2). Total num frames: 68849664. Throughput: 0: 2903.1. Samples: 12211740. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:28,942][04005] Avg episode reward: [(0, '53.230')] +[2024-07-05 17:17:30,650][04594] Updated weights for policy 0, policy_version 16814 (0.0011) +[2024-07-05 17:17:33,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.4, 300 sec: 11538.2). Total num frames: 68907008. Throughput: 0: 2893.8. Samples: 12220232. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:33,942][04005] Avg episode reward: [(0, '52.495')] +[2024-07-05 17:17:34,178][04594] Updated weights for policy 0, policy_version 16824 (0.0012) +[2024-07-05 17:17:37,704][04594] Updated weights for policy 0, policy_version 16834 (0.0012) +[2024-07-05 17:17:38,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11538.2). Total num frames: 68964352. Throughput: 0: 2894.8. Samples: 12237672. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:38,942][04005] Avg episode reward: [(0, '50.856')] +[2024-07-05 17:17:41,252][04594] Updated weights for policy 0, policy_version 16844 (0.0012) +[2024-07-05 17:17:43,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11538.2). Total num frames: 69021696. Throughput: 0: 2895.7. Samples: 12255060. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:43,942][04005] Avg episode reward: [(0, '50.207')] +[2024-07-05 17:17:44,770][04594] Updated weights for policy 0, policy_version 16854 (0.0012) +[2024-07-05 17:17:48,304][04594] Updated weights for policy 0, policy_version 16864 (0.0012) +[2024-07-05 17:17:48,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11537.1, 300 sec: 11538.2). Total num frames: 69079040. Throughput: 0: 2904.1. Samples: 12263962. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:48,942][04005] Avg episode reward: [(0, '50.457')] +[2024-07-05 17:17:51,840][04594] Updated weights for policy 0, policy_version 16874 (0.0012) +[2024-07-05 17:17:53,942][04005] Fps is (10 sec: 11468.6, 60 sec: 11537.0, 300 sec: 11538.2). Total num frames: 69136384. Throughput: 0: 2904.1. Samples: 12281360. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:53,942][04005] Avg episode reward: [(0, '51.209')] +[2024-07-05 17:17:55,374][04594] Updated weights for policy 0, policy_version 16884 (0.0011) +[2024-07-05 17:17:58,905][04594] Updated weights for policy 0, policy_version 16894 (0.0012) +[2024-07-05 17:17:58,941][04005] Fps is (10 sec: 11878.4, 60 sec: 11605.3, 300 sec: 11552.1). Total num frames: 69197824. Throughput: 0: 2904.5. Samples: 12298770. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:17:58,942][04005] Avg episode reward: [(0, '51.114')] +[2024-07-05 17:18:02,449][04594] Updated weights for policy 0, policy_version 16904 (0.0012) +[2024-07-05 17:18:03,942][04005] Fps is (10 sec: 11878.5, 60 sec: 11605.3, 300 sec: 11552.1). Total num frames: 69255168. Throughput: 0: 2894.7. Samples: 12307246. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:03,943][04005] Avg episode reward: [(0, '51.540')] +[2024-07-05 17:18:05,981][04594] Updated weights for policy 0, policy_version 16914 (0.0012) +[2024-07-05 17:18:08,942][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11552.1). Total num frames: 69312512. Throughput: 0: 2894.3. Samples: 12324620. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:08,942][04005] Avg episode reward: [(0, '52.000')] +[2024-07-05 17:18:09,524][04594] Updated weights for policy 0, policy_version 16924 (0.0012) +[2024-07-05 17:18:13,063][04594] Updated weights for policy 0, policy_version 16934 (0.0012) +[2024-07-05 17:18:13,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11552.1). Total num frames: 69369856. Throughput: 0: 2895.6. Samples: 12342040. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:13,943][04005] Avg episode reward: [(0, '52.676')] +[2024-07-05 17:18:16,588][04594] Updated weights for policy 0, policy_version 16944 (0.0011) +[2024-07-05 17:18:18,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.3, 300 sec: 11552.1). Total num frames: 69427200. Throughput: 0: 2902.2. Samples: 12350830. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:18,942][04005] Avg episode reward: [(0, '50.470')] +[2024-07-05 17:18:20,110][04594] Updated weights for policy 0, policy_version 16954 (0.0012) +[2024-07-05 17:18:23,639][04594] Updated weights for policy 0, policy_version 16964 (0.0012) +[2024-07-05 17:18:23,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11537.1, 300 sec: 11552.1). Total num frames: 69484544. Throughput: 0: 2902.5. Samples: 12368286. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:23,942][04005] Avg episode reward: [(0, '49.573')] +[2024-07-05 17:18:27,166][04594] Updated weights for policy 0, policy_version 16974 (0.0012) +[2024-07-05 17:18:28,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11605.3, 300 sec: 11566.0). Total num frames: 69545984. Throughput: 0: 2903.6. Samples: 12385720. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:28,942][04005] Avg episode reward: [(0, '49.038')] +[2024-07-05 17:18:30,705][04594] Updated weights for policy 0, policy_version 16984 (0.0012) +[2024-07-05 17:18:33,942][04005] Fps is (10 sec: 11878.3, 60 sec: 11605.3, 300 sec: 11566.0). Total num frames: 69603328. Throughput: 0: 2894.5. Samples: 12394216. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:33,943][04005] Avg episode reward: [(0, '50.829')] +[2024-07-05 17:18:33,946][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000016993_69603328.pth... +[2024-07-05 17:18:34,022][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000016314_66822144.pth +[2024-07-05 17:18:34,295][04594] Updated weights for policy 0, policy_version 16994 (0.0012) +[2024-07-05 17:18:37,779][04594] Updated weights for policy 0, policy_version 17004 (0.0012) +[2024-07-05 17:18:38,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11566.0). Total num frames: 69660672. Throughput: 0: 2894.3. Samples: 12411602. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:38,942][04005] Avg episode reward: [(0, '52.609')] +[2024-07-05 17:18:41,320][04594] Updated weights for policy 0, policy_version 17014 (0.0012) +[2024-07-05 17:18:43,942][04005] Fps is (10 sec: 11468.8, 60 sec: 11605.3, 300 sec: 11566.0). Total num frames: 69718016. Throughput: 0: 2893.4. Samples: 12428972. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:43,943][04005] Avg episode reward: [(0, '52.285')] +[2024-07-05 17:18:44,844][04594] Updated weights for policy 0, policy_version 17024 (0.0012) +[2024-07-05 17:18:48,379][04594] Updated weights for policy 0, policy_version 17034 (0.0012) +[2024-07-05 17:18:48,941][04005] Fps is (10 sec: 11468.7, 60 sec: 11605.3, 300 sec: 11566.0). Total num frames: 69775360. Throughput: 0: 2897.6. Samples: 12437638. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:48,942][04005] Avg episode reward: [(0, '52.558')] +[2024-07-05 17:18:51,913][04594] Updated weights for policy 0, policy_version 17044 (0.0012) +[2024-07-05 17:18:53,941][04005] Fps is (10 sec: 11468.9, 60 sec: 11605.4, 300 sec: 11579.9). Total num frames: 69832704. Throughput: 0: 2897.6. Samples: 12455010. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:53,942][04005] Avg episode reward: [(0, '51.826')] +[2024-07-05 17:18:55,439][04594] Updated weights for policy 0, policy_version 17054 (0.0012) +[2024-07-05 17:18:58,941][04005] Fps is (10 sec: 11468.8, 60 sec: 11537.1, 300 sec: 11579.9). Total num frames: 69890048. Throughput: 0: 2897.7. Samples: 12472438. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2024-07-05 17:18:58,942][04005] Avg episode reward: [(0, '53.025')] +[2024-07-05 17:18:58,970][04594] Updated weights for policy 0, policy_version 17064 (0.0012) +[2024-07-05 17:19:02,510][04594] Updated weights for policy 0, policy_version 17074 (0.0012) +[2024-07-05 17:19:03,941][04005] Fps is (10 sec: 11878.5, 60 sec: 11605.4, 300 sec: 11593.8). Total num frames: 69951488. Throughput: 0: 2896.6. Samples: 12481176. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-07-05 17:19:03,942][04005] Avg episode reward: [(0, '53.389')] +[2024-07-05 17:19:06,065][04594] Updated weights for policy 0, policy_version 17084 (0.0012) +[2024-07-05 17:19:08,538][04581] Stopping Batcher_0... +[2024-07-05 17:19:08,538][04581] Loop batcher_evt_loop terminating... +[2024-07-05 17:19:08,538][04005] Component Batcher_0 stopped! +[2024-07-05 17:19:08,539][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000017091_70004736.pth... +[2024-07-05 17:19:08,551][04598] Stopping RolloutWorker_w4... +[2024-07-05 17:19:08,551][04602] Stopping RolloutWorker_w6... +[2024-07-05 17:19:08,551][04595] Stopping RolloutWorker_w0... +[2024-07-05 17:19:08,551][04602] Loop rollout_proc6_evt_loop terminating... +[2024-07-05 17:19:08,551][04596] Stopping RolloutWorker_w2... +[2024-07-05 17:19:08,551][04598] Loop rollout_proc4_evt_loop terminating... +[2024-07-05 17:19:08,551][04600] Stopping RolloutWorker_w5... +[2024-07-05 17:19:08,551][04597] Stopping RolloutWorker_w1... +[2024-07-05 17:19:08,551][04595] Loop rollout_proc0_evt_loop terminating... +[2024-07-05 17:19:08,552][04596] Loop rollout_proc2_evt_loop terminating... +[2024-07-05 17:19:08,552][04597] Loop rollout_proc1_evt_loop terminating... +[2024-07-05 17:19:08,551][04005] Component RolloutWorker_w4 stopped! +[2024-07-05 17:19:08,552][04600] Loop rollout_proc5_evt_loop terminating... +[2024-07-05 17:19:08,552][04599] Stopping RolloutWorker_w3... +[2024-07-05 17:19:08,552][04599] Loop rollout_proc3_evt_loop terminating... +[2024-07-05 17:19:08,552][04601] Stopping RolloutWorker_w7... +[2024-07-05 17:19:08,553][04601] Loop rollout_proc7_evt_loop terminating... +[2024-07-05 17:19:08,552][04005] Component RolloutWorker_w6 stopped! +[2024-07-05 17:19:08,554][04005] Component RolloutWorker_w0 stopped! +[2024-07-05 17:19:08,555][04005] Component RolloutWorker_w2 stopped! +[2024-07-05 17:19:08,556][04005] Component RolloutWorker_w5 stopped! +[2024-07-05 17:19:08,556][04005] Component RolloutWorker_w1 stopped! +[2024-07-05 17:19:08,557][04005] Component RolloutWorker_w3 stopped! +[2024-07-05 17:19:08,558][04005] Component RolloutWorker_w7 stopped! +[2024-07-05 17:19:08,570][04594] Weights refcount: 2 0 +[2024-07-05 17:19:08,572][04594] Stopping InferenceWorker_p0-w0... +[2024-07-05 17:19:08,572][04594] Loop inference_proc0-0_evt_loop terminating... +[2024-07-05 17:19:08,572][04005] Component InferenceWorker_p0-w0 stopped! +[2024-07-05 17:19:08,630][04581] Removing /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000016654_68214784.pth +[2024-07-05 17:19:08,642][04581] Saving /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000017091_70004736.pth... +[2024-07-05 17:19:08,756][04581] Stopping LearnerWorker_p0... +[2024-07-05 17:19:08,756][04581] Loop learner_proc0_evt_loop terminating... +[2024-07-05 17:19:08,756][04005] Component LearnerWorker_p0 stopped! +[2024-07-05 17:19:08,758][04005] Waiting for process learner_proc0 to stop... +[2024-07-05 17:19:09,723][04005] Waiting for process inference_proc0-0 to join... +[2024-07-05 17:19:09,724][04005] Waiting for process rollout_proc0 to join... +[2024-07-05 17:19:09,725][04005] Waiting for process rollout_proc1 to join... +[2024-07-05 17:19:09,726][04005] Waiting for process rollout_proc2 to join... +[2024-07-05 17:19:09,726][04005] Waiting for process rollout_proc3 to join... +[2024-07-05 17:19:09,726][04005] Waiting for process rollout_proc4 to join... +[2024-07-05 17:19:09,727][04005] Waiting for process rollout_proc5 to join... +[2024-07-05 17:19:09,727][04005] Waiting for process rollout_proc6 to join... +[2024-07-05 17:19:09,728][04005] Waiting for process rollout_proc7 to join... +[2024-07-05 17:19:09,728][04005] Batcher 0 profile tree view: +batching: 78.0789, releasing_batches: 0.2794 +[2024-07-05 17:19:09,728][04005] InferenceWorker_p0-w0 profile tree view: wait_policy: 0.0000 - wait_policy_total: 152.4061 -update_model: 47.2008 - weight_update: 0.0007 -one_step: 0.0025 - handle_policy_step: 2893.1792 - deserialize: 233.1637, stack: 16.4005, obs_to_device_normalize: 684.9469, forward: 1336.5257, send_messages: 142.3971 - prepare_outputs: 380.2794 - to_cpu: 235.5221 -[2024-07-05 15:47:35,055][03423] Learner 0 profile tree view: -misc: 0.0765, prepare_batch: 301.8805 -train: 709.7991 - epoch_init: 0.0606, minibatch_init: 0.0816, losses_postprocess: 3.1675, kl_divergence: 3.7417, after_optimizer: 2.9246 - calculate_losses: 277.5781 - losses_init: 0.0306, forward_head: 10.9008, bptt_initial: 226.0494, tail: 8.6468, advantages_returns: 2.4628, losses: 12.7700 - bptt: 13.7327 - bptt_forward_core: 13.0757 - update: 416.4638 - clip: 11.6032 -[2024-07-05 15:47:35,055][03423] RolloutWorker_w0 profile tree view: -wait_for_trajectories: 1.4183, enqueue_policy_requests: 94.3193, env_step: 1551.9875, overhead: 157.4841, complete_rollouts: 3.3137 -save_policy_outputs: 119.0424 - split_output_tensors: 55.2342 -[2024-07-05 15:47:35,055][03423] RolloutWorker_w15 profile tree view: -wait_for_trajectories: 1.4423, enqueue_policy_requests: 96.5808, env_step: 1554.5923, overhead: 156.0953, complete_rollouts: 3.6541 -save_policy_outputs: 117.3319 - split_output_tensors: 55.3652 -[2024-07-05 15:47:35,056][03423] Loop Runner_EvtLoop terminating... -[2024-07-05 15:47:35,057][03423] Runner profile tree view: -main_loop: 3202.8765 -[2024-07-05 15:47:35,057][03423] Collected {0: 600014848}, FPS: 46834.1 -[2024-07-05 15:48:15,708][03423] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 15:48:15,709][03423] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 15:48:15,709][03423] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 15:48:15,710][03423] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 15:48:15,710][03423] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 15:48:15,710][03423] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 15:48:15,711][03423] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 15:48:15,711][03423] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 15:48:15,711][03423] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-07-05 15:48:15,711][03423] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-07-05 15:48:15,712][03423] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 15:48:15,712][03423] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 15:48:15,712][03423] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 15:48:15,713][03423] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 15:48:15,713][03423] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 15:48:15,746][03423] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-07-05 15:48:15,749][03423] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 15:48:15,754][03423] RunningMeanStd input shape: (1,) -[2024-07-05 15:48:15,786][03423] ConvEncoder: input_channels=3 -[2024-07-05 15:48:15,924][03423] Conv encoder output size: 512 -[2024-07-05 15:48:15,924][03423] Policy head output size: 512 -[2024-07-05 15:48:17,649][03423] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000075686_600014848.pth... -[2024-07-05 15:48:18,496][03423] Num frames 100... -[2024-07-05 15:48:18,562][03423] Num frames 200... -[2024-07-05 15:48:18,626][03423] Num frames 300... -[2024-07-05 15:48:18,690][03423] Num frames 400... -[2024-07-05 15:48:18,757][03423] Num frames 500... -[2024-07-05 15:48:18,819][03423] Num frames 600... -[2024-07-05 15:48:18,882][03423] Num frames 700... -[2024-07-05 15:48:18,949][03423] Num frames 800... -[2024-07-05 15:48:19,013][03423] Num frames 900... -[2024-07-05 15:48:19,075][03423] Num frames 1000... -[2024-07-05 15:48:19,139][03423] Num frames 1100... -[2024-07-05 15:48:19,203][03423] Num frames 1200... -[2024-07-05 15:48:19,267][03423] Num frames 1300... -[2024-07-05 15:48:19,331][03423] Num frames 1400... -[2024-07-05 15:48:19,394][03423] Num frames 1500... -[2024-07-05 15:48:19,457][03423] Num frames 1600... -[2024-07-05 15:48:19,522][03423] Num frames 1700... -[2024-07-05 15:48:19,585][03423] Num frames 1800... -[2024-07-05 15:48:19,650][03423] Num frames 1900... -[2024-07-05 15:48:19,711][03423] Num frames 2000... -[2024-07-05 15:48:19,774][03423] Num frames 2100... -[2024-07-05 15:48:19,826][03423] Avg episode rewards: #0: 60.999, true rewards: #0: 21.000 -[2024-07-05 15:48:19,827][03423] Avg episode reward: 60.999, avg true_objective: 21.000 -[2024-07-05 15:48:19,891][03423] Num frames 2200... -[2024-07-05 15:48:19,953][03423] Num frames 2300... -[2024-07-05 15:48:20,013][03423] Num frames 2400... -[2024-07-05 15:48:20,074][03423] Num frames 2500... -[2024-07-05 15:48:20,135][03423] Num frames 2600... -[2024-07-05 15:48:20,198][03423] Num frames 2700... -[2024-07-05 15:48:20,258][03423] Num frames 2800... -[2024-07-05 15:48:20,319][03423] Num frames 2900... -[2024-07-05 15:48:20,381][03423] Num frames 3000... -[2024-07-05 15:48:20,444][03423] Num frames 3100... -[2024-07-05 15:48:20,506][03423] Num frames 3200... -[2024-07-05 15:48:20,567][03423] Num frames 3300... -[2024-07-05 15:48:20,629][03423] Num frames 3400... -[2024-07-05 15:48:20,693][03423] Num frames 3500... -[2024-07-05 15:48:20,756][03423] Num frames 3600... -[2024-07-05 15:48:20,819][03423] Num frames 3700... -[2024-07-05 15:48:20,882][03423] Num frames 3800... -[2024-07-05 15:48:20,945][03423] Num frames 3900... -[2024-07-05 15:48:21,008][03423] Num frames 4000... -[2024-07-05 15:48:21,071][03423] Num frames 4100... -[2024-07-05 15:48:21,136][03423] Num frames 4200... -[2024-07-05 15:48:21,187][03423] Avg episode rewards: #0: 60.499, true rewards: #0: 21.000 -[2024-07-05 15:48:21,188][03423] Avg episode reward: 60.499, avg true_objective: 21.000 -[2024-07-05 15:48:21,255][03423] Num frames 4300... -[2024-07-05 15:48:21,317][03423] Num frames 4400... -[2024-07-05 15:48:21,380][03423] Num frames 4500... -[2024-07-05 15:48:21,442][03423] Num frames 4600... -[2024-07-05 15:48:21,505][03423] Num frames 4700... -[2024-07-05 15:48:21,567][03423] Num frames 4800... -[2024-07-05 15:48:21,631][03423] Num frames 4900... -[2024-07-05 15:48:21,694][03423] Num frames 5000... -[2024-07-05 15:48:21,755][03423] Num frames 5100... -[2024-07-05 15:48:21,818][03423] Num frames 5200... -[2024-07-05 15:48:21,883][03423] Num frames 5300... -[2024-07-05 15:48:21,944][03423] Num frames 5400... -[2024-07-05 15:48:22,009][03423] Num frames 5500... -[2024-07-05 15:48:22,073][03423] Num frames 5600... -[2024-07-05 15:48:22,133][03423] Num frames 5700... -[2024-07-05 15:48:22,212][03423] Num frames 5800... -[2024-07-05 15:48:22,273][03423] Num frames 5900... -[2024-07-05 15:48:22,334][03423] Num frames 6000... -[2024-07-05 15:48:22,397][03423] Num frames 6100... -[2024-07-05 15:48:22,462][03423] Avg episode rewards: #0: 60.065, true rewards: #0: 20.400 -[2024-07-05 15:48:22,463][03423] Avg episode reward: 60.065, avg true_objective: 20.400 -[2024-07-05 15:48:22,517][03423] Num frames 6200... -[2024-07-05 15:48:22,578][03423] Num frames 6300... -[2024-07-05 15:48:22,640][03423] Num frames 6400... -[2024-07-05 15:48:22,702][03423] Num frames 6500... -[2024-07-05 15:48:22,766][03423] Num frames 6600... -[2024-07-05 15:48:22,828][03423] Num frames 6700... -[2024-07-05 15:48:22,892][03423] Num frames 6800... -[2024-07-05 15:48:22,953][03423] Num frames 6900... -[2024-07-05 15:48:23,014][03423] Num frames 7000... -[2024-07-05 15:48:23,078][03423] Num frames 7100... -[2024-07-05 15:48:23,141][03423] Num frames 7200... -[2024-07-05 15:48:23,205][03423] Num frames 7300... -[2024-07-05 15:48:23,269][03423] Num frames 7400... -[2024-07-05 15:48:23,329][03423] Num frames 7500... -[2024-07-05 15:48:23,394][03423] Num frames 7600... -[2024-07-05 15:48:23,457][03423] Num frames 7700... -[2024-07-05 15:48:23,523][03423] Num frames 7800... -[2024-07-05 15:48:23,589][03423] Num frames 7900... -[2024-07-05 15:48:23,652][03423] Num frames 8000... -[2024-07-05 15:48:23,714][03423] Num frames 8100... -[2024-07-05 15:48:23,778][03423] Num frames 8200... -[2024-07-05 15:48:23,844][03423] Avg episode rewards: #0: 60.549, true rewards: #0: 20.550 -[2024-07-05 15:48:23,845][03423] Avg episode reward: 60.549, avg true_objective: 20.550 -[2024-07-05 15:48:23,899][03423] Num frames 8300... -[2024-07-05 15:48:23,958][03423] Num frames 8400... -[2024-07-05 15:48:24,021][03423] Num frames 8500... -[2024-07-05 15:48:24,084][03423] Num frames 8600... -[2024-07-05 15:48:24,143][03423] Num frames 8700... -[2024-07-05 15:48:24,206][03423] Num frames 8800... -[2024-07-05 15:48:24,269][03423] Num frames 8900... -[2024-07-05 15:48:24,332][03423] Num frames 9000... -[2024-07-05 15:48:24,396][03423] Num frames 9100... -[2024-07-05 15:48:24,461][03423] Num frames 9200... -[2024-07-05 15:48:24,523][03423] Num frames 9300... -[2024-07-05 15:48:24,594][03423] Num frames 9400... -[2024-07-05 15:48:24,657][03423] Num frames 9500... -[2024-07-05 15:48:24,718][03423] Num frames 9600... -[2024-07-05 15:48:24,780][03423] Num frames 9700... -[2024-07-05 15:48:24,842][03423] Num frames 9800... -[2024-07-05 15:48:24,903][03423] Num frames 9900... -[2024-07-05 15:48:24,966][03423] Num frames 10000... -[2024-07-05 15:48:25,030][03423] Num frames 10100... -[2024-07-05 15:48:25,094][03423] Num frames 10200... -[2024-07-05 15:48:25,158][03423] Num frames 10300... -[2024-07-05 15:48:25,225][03423] Avg episode rewards: #0: 60.439, true rewards: #0: 20.640 -[2024-07-05 15:48:25,226][03423] Avg episode reward: 60.439, avg true_objective: 20.640 -[2024-07-05 15:48:25,280][03423] Num frames 10400... -[2024-07-05 15:48:25,341][03423] Num frames 10500... -[2024-07-05 15:48:25,413][03423] Num frames 10600... -[2024-07-05 15:48:25,483][03423] Num frames 10700... -[2024-07-05 15:48:25,546][03423] Num frames 10800... -[2024-07-05 15:48:25,609][03423] Num frames 10900... -[2024-07-05 15:48:25,675][03423] Num frames 11000... -[2024-07-05 15:48:25,739][03423] Num frames 11100... -[2024-07-05 15:48:25,801][03423] Num frames 11200... -[2024-07-05 15:48:25,867][03423] Num frames 11300... -[2024-07-05 15:48:25,931][03423] Num frames 11400... -[2024-07-05 15:48:25,992][03423] Num frames 11500... -[2024-07-05 15:48:26,056][03423] Num frames 11600... -[2024-07-05 15:48:26,124][03423] Num frames 11700... -[2024-07-05 15:48:26,187][03423] Num frames 11800... -[2024-07-05 15:48:26,247][03423] Num frames 11900... -[2024-07-05 15:48:26,311][03423] Num frames 12000... -[2024-07-05 15:48:26,375][03423] Num frames 12100... -[2024-07-05 15:48:26,438][03423] Num frames 12200... -[2024-07-05 15:48:26,501][03423] Num frames 12300... -[2024-07-05 15:48:26,564][03423] Num frames 12400... -[2024-07-05 15:48:26,630][03423] Avg episode rewards: #0: 60.865, true rewards: #0: 20.700 -[2024-07-05 15:48:26,632][03423] Avg episode reward: 60.865, avg true_objective: 20.700 -[2024-07-05 15:48:26,689][03423] Num frames 12500... -[2024-07-05 15:48:26,750][03423] Num frames 12600... -[2024-07-05 15:48:26,812][03423] Num frames 12700... -[2024-07-05 15:48:26,884][03423] Num frames 12800... -[2024-07-05 15:48:26,949][03423] Num frames 12900... -[2024-07-05 15:48:27,011][03423] Num frames 13000... -[2024-07-05 15:48:27,121][03423] Avg episode rewards: #0: 54.130, true rewards: #0: 18.703 -[2024-07-05 15:48:27,122][03423] Avg episode reward: 54.130, avg true_objective: 18.703 -[2024-07-05 15:48:27,131][03423] Num frames 13100... -[2024-07-05 15:48:27,195][03423] Num frames 13200... -[2024-07-05 15:48:27,258][03423] Num frames 13300... -[2024-07-05 15:48:27,321][03423] Num frames 13400... -[2024-07-05 15:48:27,385][03423] Num frames 13500... -[2024-07-05 15:48:27,446][03423] Num frames 13600... -[2024-07-05 15:48:27,518][03423] Num frames 13700... -[2024-07-05 15:48:27,581][03423] Num frames 13800... -[2024-07-05 15:48:27,643][03423] Num frames 13900... -[2024-07-05 15:48:27,708][03423] Num frames 14000... -[2024-07-05 15:48:27,769][03423] Num frames 14100... -[2024-07-05 15:48:27,834][03423] Num frames 14200... -[2024-07-05 15:48:27,896][03423] Num frames 14300... -[2024-07-05 15:48:27,958][03423] Num frames 14400... -[2024-07-05 15:48:28,022][03423] Num frames 14500... -[2024-07-05 15:48:28,085][03423] Num frames 14600... -[2024-07-05 15:48:28,153][03423] Num frames 14700... -[2024-07-05 15:48:28,218][03423] Num frames 14800... -[2024-07-05 15:48:28,281][03423] Num frames 14900... -[2024-07-05 15:48:28,343][03423] Num frames 15000... -[2024-07-05 15:48:28,408][03423] Num frames 15100... -[2024-07-05 15:48:28,521][03423] Avg episode rewards: #0: 54.864, true rewards: #0: 18.990 -[2024-07-05 15:48:28,522][03423] Avg episode reward: 54.864, avg true_objective: 18.990 -[2024-07-05 15:48:28,532][03423] Num frames 15200... -[2024-07-05 15:48:28,607][03423] Num frames 15300... -[2024-07-05 15:48:28,668][03423] Num frames 15400... -[2024-07-05 15:48:28,729][03423] Num frames 15500... -[2024-07-05 15:48:28,791][03423] Num frames 15600... -[2024-07-05 15:48:28,849][03423] Num frames 15700... -[2024-07-05 15:48:28,911][03423] Num frames 15800... -[2024-07-05 15:48:28,973][03423] Num frames 15900... -[2024-07-05 15:48:29,035][03423] Num frames 16000... -[2024-07-05 15:48:29,095][03423] Num frames 16100... -[2024-07-05 15:48:29,155][03423] Num frames 16200... -[2024-07-05 15:48:29,220][03423] Num frames 16300... -[2024-07-05 15:48:29,282][03423] Num frames 16400... -[2024-07-05 15:48:29,343][03423] Num frames 16500... -[2024-07-05 15:48:29,404][03423] Num frames 16600... -[2024-07-05 15:48:29,465][03423] Num frames 16700... -[2024-07-05 15:48:29,526][03423] Num frames 16800... -[2024-07-05 15:48:29,591][03423] Num frames 16900... -[2024-07-05 15:48:29,653][03423] Num frames 17000... -[2024-07-05 15:48:29,715][03423] Num frames 17100... -[2024-07-05 15:48:29,777][03423] Num frames 17200... -[2024-07-05 15:48:29,887][03423] Avg episode rewards: #0: 55.768, true rewards: #0: 19.213 -[2024-07-05 15:48:29,888][03423] Avg episode reward: 55.768, avg true_objective: 19.213 -[2024-07-05 15:48:29,898][03423] Num frames 17300... -[2024-07-05 15:48:29,961][03423] Num frames 17400... -[2024-07-05 15:48:30,022][03423] Num frames 17500... -[2024-07-05 15:48:30,082][03423] Num frames 17600... -[2024-07-05 15:48:30,140][03423] Num frames 17700... -[2024-07-05 15:48:30,200][03423] Num frames 17800... -[2024-07-05 15:48:30,259][03423] Num frames 17900... -[2024-07-05 15:48:30,322][03423] Num frames 18000... -[2024-07-05 15:48:30,383][03423] Num frames 18100... -[2024-07-05 15:48:30,444][03423] Num frames 18200... -[2024-07-05 15:48:30,503][03423] Num frames 18300... -[2024-07-05 15:48:30,564][03423] Num frames 18400... -[2024-07-05 15:48:30,623][03423] Num frames 18500... -[2024-07-05 15:48:30,684][03423] Num frames 18600... -[2024-07-05 15:48:30,744][03423] Num frames 18700... -[2024-07-05 15:48:30,807][03423] Num frames 18800... -[2024-07-05 15:48:30,868][03423] Num frames 18900... -[2024-07-05 15:48:30,929][03423] Num frames 19000... -[2024-07-05 15:48:30,991][03423] Num frames 19100... -[2024-07-05 15:48:31,053][03423] Num frames 19200... -[2024-07-05 15:48:31,115][03423] Num frames 19300... -[2024-07-05 15:48:31,222][03423] Avg episode rewards: #0: 55.891, true rewards: #0: 19.392 -[2024-07-05 15:48:31,223][03423] Avg episode reward: 55.891, avg true_objective: 19.392 -[2024-07-05 15:48:51,264][03423] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! -[2024-07-05 15:50:22,629][03423] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/config.json -[2024-07-05 15:50:22,630][03423] Overriding arg 'num_workers' with value 1 passed from command line -[2024-07-05 15:50:22,630][03423] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-07-05 15:50:22,630][03423] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-07-05 15:50:22,630][03423] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-07-05 15:50:22,631][03423] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-07-05 15:50:22,631][03423] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! -[2024-07-05 15:50:22,631][03423] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-07-05 15:50:22,631][03423] Adding new argument 'push_to_hub'=True that is not in the saved config file! -[2024-07-05 15:50:22,632][03423] Adding new argument 'hf_repository'='ra9hu/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! -[2024-07-05 15:50:22,632][03423] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-07-05 15:50:22,632][03423] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-07-05 15:50:22,632][03423] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-07-05 15:50:22,633][03423] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-07-05 15:50:22,633][03423] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-07-05 15:50:22,646][03423] RunningMeanStd input shape: (3, 72, 128) -[2024-07-05 15:50:22,647][03423] RunningMeanStd input shape: (1,) -[2024-07-05 15:50:22,655][03423] ConvEncoder: input_channels=3 -[2024-07-05 15:50:22,675][03423] Conv encoder output size: 512 -[2024-07-05 15:50:22,676][03423] Policy head output size: 512 -[2024-07-05 15:50:22,693][03423] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/checkpoint_p0/checkpoint_000075686_600014848.pth... -[2024-07-05 15:50:23,279][03423] Num frames 100... -[2024-07-05 15:50:23,342][03423] Num frames 200... -[2024-07-05 15:50:23,406][03423] Num frames 300... -[2024-07-05 15:50:23,466][03423] Num frames 400... -[2024-07-05 15:50:23,526][03423] Num frames 500... -[2024-07-05 15:50:23,586][03423] Num frames 600... -[2024-07-05 15:50:23,648][03423] Num frames 700... -[2024-07-05 15:50:23,708][03423] Num frames 800... -[2024-07-05 15:50:23,770][03423] Num frames 900... -[2024-07-05 15:50:23,831][03423] Num frames 1000... -[2024-07-05 15:50:23,895][03423] Num frames 1100... -[2024-07-05 15:50:23,962][03423] Num frames 1200... -[2024-07-05 15:50:24,032][03423] Num frames 1300... -[2024-07-05 15:50:24,094][03423] Num frames 1400... -[2024-07-05 15:50:24,158][03423] Num frames 1500... -[2024-07-05 15:50:24,222][03423] Num frames 1600... -[2024-07-05 15:50:24,285][03423] Num frames 1700... -[2024-07-05 15:50:24,350][03423] Num frames 1800... -[2024-07-05 15:50:24,412][03423] Num frames 1900... -[2024-07-05 15:50:24,476][03423] Num frames 2000... -[2024-07-05 15:50:24,541][03423] Num frames 2100... -[2024-07-05 15:50:24,593][03423] Avg episode rewards: #0: 59.999, true rewards: #0: 21.000 -[2024-07-05 15:50:24,594][03423] Avg episode reward: 59.999, avg true_objective: 21.000 -[2024-07-05 15:50:24,670][03423] Num frames 2200... -[2024-07-05 15:50:24,733][03423] Num frames 2300... -[2024-07-05 15:50:24,798][03423] Num frames 2400... -[2024-07-05 15:50:24,864][03423] Num frames 2500... -[2024-07-05 15:50:24,929][03423] Num frames 2600... -[2024-07-05 15:50:24,994][03423] Num frames 2700... -[2024-07-05 15:50:25,056][03423] Num frames 2800... -[2024-07-05 15:50:25,119][03423] Num frames 2900... -[2024-07-05 15:50:25,181][03423] Num frames 3000... -[2024-07-05 15:50:25,245][03423] Num frames 3100... -[2024-07-05 15:50:25,307][03423] Num frames 3200... -[2024-07-05 15:50:25,369][03423] Num frames 3300... -[2024-07-05 15:50:25,430][03423] Num frames 3400... -[2024-07-05 15:50:25,492][03423] Num frames 3500... -[2024-07-05 15:50:25,554][03423] Num frames 3600... -[2024-07-05 15:50:25,617][03423] Num frames 3700... -[2024-07-05 15:50:25,682][03423] Num frames 3800... -[2024-07-05 15:50:25,742][03423] Num frames 3900... -[2024-07-05 15:50:25,803][03423] Num frames 4000... -[2024-07-05 15:50:25,865][03423] Num frames 4100... -[2024-07-05 15:50:25,931][03423] Num frames 4200... -[2024-07-05 15:50:25,983][03423] Avg episode rewards: #0: 59.999, true rewards: #0: 21.000 -[2024-07-05 15:50:25,985][03423] Avg episode reward: 59.999, avg true_objective: 21.000 -[2024-07-05 15:50:26,051][03423] Num frames 4300... -[2024-07-05 15:50:26,114][03423] Num frames 4400... -[2024-07-05 15:50:26,176][03423] Num frames 4500... -[2024-07-05 15:50:26,236][03423] Num frames 4600... -[2024-07-05 15:50:26,306][03423] Num frames 4700... -[2024-07-05 15:50:26,381][03423] Num frames 4800... -[2024-07-05 15:50:26,463][03423] Num frames 4900... -[2024-07-05 15:50:26,546][03423] Num frames 5000... -[2024-07-05 15:50:26,609][03423] Num frames 5100... -[2024-07-05 15:50:26,672][03423] Num frames 5200... -[2024-07-05 15:50:26,733][03423] Num frames 5300... -[2024-07-05 15:50:26,796][03423] Num frames 5400... -[2024-07-05 15:50:26,857][03423] Num frames 5500... -[2024-07-05 15:50:26,917][03423] Num frames 5600... -[2024-07-05 15:50:26,980][03423] Num frames 5700... -[2024-07-05 15:50:27,041][03423] Num frames 5800... -[2024-07-05 15:50:27,106][03423] Num frames 5900... -[2024-07-05 15:50:27,168][03423] Num frames 6000... -[2024-07-05 15:50:27,228][03423] Num frames 6100... -[2024-07-05 15:50:27,291][03423] Num frames 6200... -[2024-07-05 15:50:27,355][03423] Num frames 6300... -[2024-07-05 15:50:27,406][03423] Avg episode rewards: #0: 60.332, true rewards: #0: 21.000 -[2024-07-05 15:50:27,407][03423] Avg episode reward: 60.332, avg true_objective: 21.000 -[2024-07-05 15:50:27,477][03423] Num frames 6400... -[2024-07-05 15:50:27,551][03423] Num frames 6500... -[2024-07-05 15:50:27,619][03423] Num frames 6600... -[2024-07-05 15:50:27,689][03423] Num frames 6700... -[2024-07-05 15:50:27,763][03423] Num frames 6800... -[2024-07-05 15:50:27,834][03423] Num frames 6900... -[2024-07-05 15:50:27,900][03423] Num frames 7000... -[2024-07-05 15:50:27,965][03423] Num frames 7100... -[2024-07-05 15:50:28,026][03423] Num frames 7200... -[2024-07-05 15:50:28,087][03423] Num frames 7300... -[2024-07-05 15:50:28,149][03423] Num frames 7400... -[2024-07-05 15:50:28,213][03423] Num frames 7500... -[2024-07-05 15:50:28,276][03423] Num frames 7600... -[2024-07-05 15:50:28,357][03423] Avg episode rewards: #0: 54.104, true rewards: #0: 19.105 -[2024-07-05 15:50:28,358][03423] Avg episode reward: 54.104, avg true_objective: 19.105 -[2024-07-05 15:50:28,396][03423] Num frames 7700... -[2024-07-05 15:50:28,462][03423] Num frames 7800... -[2024-07-05 15:50:28,525][03423] Num frames 7900... -[2024-07-05 15:50:28,586][03423] Num frames 8000... -[2024-07-05 15:50:28,648][03423] Num frames 8100... -[2024-07-05 15:50:28,714][03423] Num frames 8200... -[2024-07-05 15:50:28,785][03423] Num frames 8300... -[2024-07-05 15:50:28,871][03423] Num frames 8400... -[2024-07-05 15:50:28,942][03423] Num frames 8500... -[2024-07-05 15:50:29,009][03423] Num frames 8600... -[2024-07-05 15:50:29,074][03423] Num frames 8700... -[2024-07-05 15:50:29,135][03423] Num frames 8800... -[2024-07-05 15:50:29,229][03423] Avg episode rewards: #0: 49.715, true rewards: #0: 17.716 -[2024-07-05 15:50:29,230][03423] Avg episode reward: 49.715, avg true_objective: 17.716 -[2024-07-05 15:50:29,266][03423] Num frames 8900... -[2024-07-05 15:50:29,327][03423] Num frames 9000... -[2024-07-05 15:50:29,391][03423] Num frames 9100... -[2024-07-05 15:50:29,451][03423] Num frames 9200... -[2024-07-05 15:50:29,512][03423] Num frames 9300... -[2024-07-05 15:50:29,575][03423] Num frames 9400... -[2024-07-05 15:50:29,638][03423] Num frames 9500... -[2024-07-05 15:50:29,700][03423] Num frames 9600... -[2024-07-05 15:50:29,762][03423] Num frames 9700... -[2024-07-05 15:50:29,822][03423] Num frames 9800... -[2024-07-05 15:50:29,883][03423] Num frames 9900... -[2024-07-05 15:50:29,944][03423] Num frames 10000... -[2024-07-05 15:50:30,014][03423] Num frames 10100... -[2024-07-05 15:50:30,078][03423] Num frames 10200... -[2024-07-05 15:50:30,146][03423] Num frames 10300... -[2024-07-05 15:50:30,206][03423] Num frames 10400... -[2024-07-05 15:50:30,265][03423] Num frames 10500... -[2024-07-05 15:50:30,328][03423] Num frames 10600... -[2024-07-05 15:50:30,389][03423] Num frames 10700... -[2024-07-05 15:50:30,452][03423] Num frames 10800... -[2024-07-05 15:50:30,517][03423] Num frames 10900... -[2024-07-05 15:50:30,607][03423] Avg episode rewards: #0: 51.763, true rewards: #0: 18.263 -[2024-07-05 15:50:30,608][03423] Avg episode reward: 51.763, avg true_objective: 18.263 -[2024-07-05 15:50:30,639][03423] Num frames 11000... -[2024-07-05 15:50:30,699][03423] Num frames 11100... -[2024-07-05 15:50:30,760][03423] Num frames 11200... -[2024-07-05 15:50:30,820][03423] Num frames 11300... -[2024-07-05 15:50:30,880][03423] Num frames 11400... -[2024-07-05 15:50:30,940][03423] Num frames 11500... -[2024-07-05 15:50:31,000][03423] Num frames 11600... -[2024-07-05 15:50:31,063][03423] Num frames 11700... -[2024-07-05 15:50:31,128][03423] Num frames 11800... -[2024-07-05 15:50:31,191][03423] Num frames 11900... -[2024-07-05 15:50:31,255][03423] Num frames 12000... -[2024-07-05 15:50:31,318][03423] Num frames 12100... -[2024-07-05 15:50:31,381][03423] Num frames 12200... -[2024-07-05 15:50:31,446][03423] Num frames 12300... -[2024-07-05 15:50:31,510][03423] Num frames 12400... -[2024-07-05 15:50:31,571][03423] Num frames 12500... -[2024-07-05 15:50:31,633][03423] Num frames 12600... -[2024-07-05 15:50:31,699][03423] Num frames 12700... -[2024-07-05 15:50:31,761][03423] Num frames 12800... -[2024-07-05 15:50:31,822][03423] Num frames 12900... -[2024-07-05 15:50:31,885][03423] Num frames 13000... -[2024-07-05 15:50:31,975][03423] Avg episode rewards: #0: 53.225, true rewards: #0: 18.654 -[2024-07-05 15:50:31,976][03423] Avg episode reward: 53.225, avg true_objective: 18.654 -[2024-07-05 15:50:32,010][03423] Num frames 13100... -[2024-07-05 15:50:32,072][03423] Num frames 13200... -[2024-07-05 15:50:32,133][03423] Num frames 13300... -[2024-07-05 15:50:32,197][03423] Num frames 13400... -[2024-07-05 15:50:32,259][03423] Num frames 13500... -[2024-07-05 15:50:32,321][03423] Num frames 13600... -[2024-07-05 15:50:32,384][03423] Num frames 13700... -[2024-07-05 15:50:32,447][03423] Num frames 13800... -[2024-07-05 15:50:32,508][03423] Num frames 13900... -[2024-07-05 15:50:32,570][03423] Num frames 14000... -[2024-07-05 15:50:32,632][03423] Num frames 14100... -[2024-07-05 15:50:32,695][03423] Num frames 14200... -[2024-07-05 15:50:32,755][03423] Num frames 14300... -[2024-07-05 15:50:32,817][03423] Num frames 14400... -[2024-07-05 15:50:32,882][03423] Num frames 14500... -[2024-07-05 15:50:32,944][03423] Num frames 14600... -[2024-07-05 15:50:33,005][03423] Num frames 14700... -[2024-07-05 15:50:33,069][03423] Num frames 14800... -[2024-07-05 15:50:33,132][03423] Num frames 14900... -[2024-07-05 15:50:33,196][03423] Num frames 15000... -[2024-07-05 15:50:33,268][03423] Num frames 15100... -[2024-07-05 15:50:33,362][03423] Avg episode rewards: #0: 53.697, true rewards: #0: 18.948 -[2024-07-05 15:50:33,363][03423] Avg episode reward: 53.697, avg true_objective: 18.948 -[2024-07-05 15:50:33,393][03423] Num frames 15200... -[2024-07-05 15:50:33,453][03423] Num frames 15300... -[2024-07-05 15:50:33,513][03423] Num frames 15400... -[2024-07-05 15:50:33,577][03423] Num frames 15500... -[2024-07-05 15:50:33,641][03423] Num frames 15600... -[2024-07-05 15:50:33,706][03423] Num frames 15700... -[2024-07-05 15:50:33,767][03423] Num frames 15800... -[2024-07-05 15:50:33,832][03423] Num frames 15900... -[2024-07-05 15:50:33,899][03423] Num frames 16000... -[2024-07-05 15:50:33,966][03423] Num frames 16100... -[2024-07-05 15:50:34,041][03423] Num frames 16200... -[2024-07-05 15:50:34,133][03423] Num frames 16300... -[2024-07-05 15:50:34,207][03423] Num frames 16400... -[2024-07-05 15:50:34,268][03423] Num frames 16500... -[2024-07-05 15:50:34,330][03423] Num frames 16600... -[2024-07-05 15:50:34,391][03423] Num frames 16700... -[2024-07-05 15:50:34,453][03423] Num frames 16800... -[2024-07-05 15:50:34,513][03423] Num frames 16900... -[2024-07-05 15:50:34,575][03423] Num frames 17000... -[2024-07-05 15:50:34,636][03423] Num frames 17100... -[2024-07-05 15:50:34,699][03423] Num frames 17200... -[2024-07-05 15:50:34,787][03423] Avg episode rewards: #0: 54.730, true rewards: #0: 19.176 -[2024-07-05 15:50:34,789][03423] Avg episode reward: 54.730, avg true_objective: 19.176 -[2024-07-05 15:50:34,822][03423] Num frames 17300... -[2024-07-05 15:50:34,880][03423] Num frames 17400... -[2024-07-05 15:50:34,938][03423] Num frames 17500... -[2024-07-05 15:50:35,009][03423] Num frames 17600... -[2024-07-05 15:50:35,066][03423] Num frames 17700... -[2024-07-05 15:50:35,126][03423] Num frames 17800... -[2024-07-05 15:50:35,185][03423] Num frames 17900... -[2024-07-05 15:50:35,247][03423] Num frames 18000... -[2024-07-05 15:50:35,317][03423] Num frames 18100... -[2024-07-05 15:50:35,387][03423] Num frames 18200... -[2024-07-05 15:50:35,452][03423] Num frames 18300... -[2024-07-05 15:50:35,514][03423] Num frames 18400... -[2024-07-05 15:50:35,573][03423] Num frames 18500... -[2024-07-05 15:50:35,632][03423] Num frames 18600... -[2024-07-05 15:50:35,691][03423] Num frames 18700... -[2024-07-05 15:50:35,752][03423] Num frames 18800... -[2024-07-05 15:50:35,812][03423] Num frames 18900... -[2024-07-05 15:50:35,871][03423] Num frames 19000... -[2024-07-05 15:50:35,931][03423] Num frames 19100... -[2024-07-05 15:50:35,992][03423] Num frames 19200... -[2024-07-05 15:50:36,052][03423] Num frames 19300... -[2024-07-05 15:50:36,141][03423] Avg episode rewards: #0: 55.657, true rewards: #0: 19.358 -[2024-07-05 15:50:36,143][03423] Avg episode reward: 55.657, avg true_objective: 19.358 -[2024-07-05 15:50:56,055][03423] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/default_experiment/replay.mp4! + wait_policy_total: 23.0186 +update_model: 37.8160 + weight_update: 0.0011 +one_step: 0.0029 + handle_policy_step: 4669.4145 + deserialize: 76.9583, stack: 10.5236, obs_to_device_normalize: 802.4971, forward: 2767.7908, send_messages: 94.5028 + prepare_outputs: 844.8409 + to_cpu: 761.6729 +[2024-07-05 17:19:09,729][04005] Learner 0 profile tree view: +misc: 0.0551, prepare_batch: 144.0114 +train: 3409.8811 + epoch_init: 0.1600, minibatch_init: 0.1716, losses_postprocess: 4.1734, kl_divergence: 2.5807, after_optimizer: 10.7510 + calculate_losses: 1133.8747 + losses_init: 0.0841, forward_head: 30.6552, bptt_initial: 1074.4381, tail: 5.4593, advantages_returns: 1.4418, losses: 11.3527 + bptt: 8.4248 + bptt_forward_core: 8.0201 + update: 2252.6446 + clip: 10.5088 +[2024-07-05 17:19:09,729][04005] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 1.0361, enqueue_policy_requests: 64.7761, env_step: 1045.3496, overhead: 80.8314, complete_rollouts: 1.9245 +save_policy_outputs: 87.0457 + split_output_tensors: 40.6469 +[2024-07-05 17:19:09,730][04005] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 1.1689, enqueue_policy_requests: 67.3333, env_step: 994.4821, overhead: 85.3259, complete_rollouts: 2.0731 +save_policy_outputs: 87.7417 + split_output_tensors: 41.3133 +[2024-07-05 17:19:09,730][04005] Loop Runner_EvtLoop terminating... +[2024-07-05 17:19:09,731][04005] Runner profile tree view: +main_loop: 4830.5743 +[2024-07-05 17:19:09,732][04005] Collected {0: 70004736}, FPS: 10350.7 +[2024-07-05 17:19:53,356][04005] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json +[2024-07-05 17:19:53,357][04005] Overriding arg 'num_workers' with value 1 passed from command line +[2024-07-05 17:19:53,358][04005] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-07-05 17:19:53,358][04005] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-07-05 17:19:53,359][04005] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 17:19:53,359][04005] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-07-05 17:19:53,360][04005] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 17:19:53,360][04005] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-07-05 17:19:53,361][04005] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-07-05 17:19:53,362][04005] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-07-05 17:19:53,362][04005] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-07-05 17:19:53,363][04005] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-07-05 17:19:53,364][04005] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-07-05 17:19:53,364][04005] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-07-05 17:19:53,365][04005] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-07-05 17:19:53,386][04005] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-07-05 17:19:53,388][04005] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 17:19:53,389][04005] RunningMeanStd input shape: (1,) +[2024-07-05 17:19:53,399][04005] Num input channels: 3 +[2024-07-05 17:19:53,408][04005] Convolutional layer output size: 4608 +[2024-07-05 17:19:53,423][04005] Policy head output size: 512 +[2024-07-05 17:19:55,169][04005] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000017091_70004736.pth... +[2024-07-05 17:19:55,945][04005] Num frames 100... +[2024-07-05 17:19:56,021][04005] Num frames 200... +[2024-07-05 17:19:56,094][04005] Num frames 300... +[2024-07-05 17:19:56,169][04005] Num frames 400... +[2024-07-05 17:19:56,242][04005] Num frames 500... +[2024-07-05 17:19:56,318][04005] Num frames 600... +[2024-07-05 17:19:56,390][04005] Num frames 700... +[2024-07-05 17:19:56,461][04005] Num frames 800... +[2024-07-05 17:19:56,536][04005] Num frames 900... +[2024-07-05 17:19:56,610][04005] Num frames 1000... +[2024-07-05 17:19:56,685][04005] Num frames 1100... +[2024-07-05 17:19:56,755][04005] Num frames 1200... +[2024-07-05 17:19:56,828][04005] Num frames 1300... +[2024-07-05 17:19:56,900][04005] Num frames 1400... +[2024-07-05 17:19:56,973][04005] Num frames 1500... +[2024-07-05 17:19:57,049][04005] Num frames 1600... +[2024-07-05 17:19:57,120][04005] Num frames 1700... +[2024-07-05 17:19:57,190][04005] Num frames 1800... +[2024-07-05 17:19:57,262][04005] Num frames 1900... +[2024-07-05 17:19:57,338][04005] Num frames 2000... +[2024-07-05 17:19:57,412][04005] Num frames 2100... +[2024-07-05 17:19:57,463][04005] Avg episode rewards: #0: 53.999, true rewards: #0: 21.000 +[2024-07-05 17:19:57,465][04005] Avg episode reward: 53.999, avg true_objective: 21.000 +[2024-07-05 17:19:57,565][04005] Num frames 2200... +[2024-07-05 17:19:57,663][04005] Num frames 2300... +[2024-07-05 17:19:57,743][04005] Num frames 2400... +[2024-07-05 17:19:57,819][04005] Num frames 2500... +[2024-07-05 17:19:57,891][04005] Num frames 2600... +[2024-07-05 17:19:57,964][04005] Num frames 2700... +[2024-07-05 17:19:58,036][04005] Num frames 2800... +[2024-07-05 17:19:58,107][04005] Num frames 2900... +[2024-07-05 17:19:58,190][04005] Num frames 3000... +[2024-07-05 17:19:58,265][04005] Num frames 3100... +[2024-07-05 17:19:58,341][04005] Num frames 3200... +[2024-07-05 17:19:58,418][04005] Num frames 3300... +[2024-07-05 17:19:58,493][04005] Num frames 3400... +[2024-07-05 17:19:58,569][04005] Num frames 3500... +[2024-07-05 17:19:58,644][04005] Num frames 3600... +[2024-07-05 17:19:58,716][04005] Num frames 3700... +[2024-07-05 17:19:58,791][04005] Num frames 3800... +[2024-07-05 17:19:58,868][04005] Num frames 3900... +[2024-07-05 17:19:58,945][04005] Num frames 4000... +[2024-07-05 17:19:59,022][04005] Num frames 4100... +[2024-07-05 17:19:59,098][04005] Num frames 4200... +[2024-07-05 17:19:59,149][04005] Avg episode rewards: #0: 54.999, true rewards: #0: 21.000 +[2024-07-05 17:19:59,150][04005] Avg episode reward: 54.999, avg true_objective: 21.000 +[2024-07-05 17:19:59,223][04005] Num frames 4300... +[2024-07-05 17:19:59,297][04005] Num frames 4400... +[2024-07-05 17:19:59,371][04005] Num frames 4500... +[2024-07-05 17:19:59,454][04005] Num frames 4600... +[2024-07-05 17:19:59,542][04005] Num frames 4700... +[2024-07-05 17:19:59,644][04005] Num frames 4800... +[2024-07-05 17:19:59,736][04005] Num frames 4900... +[2024-07-05 17:19:59,819][04005] Num frames 5000... +[2024-07-05 17:19:59,895][04005] Num frames 5100... +[2024-07-05 17:19:59,971][04005] Num frames 5200... +[2024-07-05 17:20:00,045][04005] Num frames 5300... +[2024-07-05 17:20:00,120][04005] Num frames 5400... +[2024-07-05 17:20:00,190][04005] Num frames 5500... +[2024-07-05 17:20:00,263][04005] Num frames 5600... +[2024-07-05 17:20:00,335][04005] Num frames 5700... +[2024-07-05 17:20:00,412][04005] Num frames 5800... +[2024-07-05 17:20:00,487][04005] Num frames 5900... +[2024-07-05 17:20:00,557][04005] Num frames 6000... +[2024-07-05 17:20:00,631][04005] Num frames 6100... +[2024-07-05 17:20:00,706][04005] Num frames 6200... +[2024-07-05 17:20:00,800][04005] Avg episode rewards: #0: 54.835, true rewards: #0: 20.837 +[2024-07-05 17:20:00,801][04005] Avg episode reward: 54.835, avg true_objective: 20.837 +[2024-07-05 17:20:00,840][04005] Num frames 6300... +[2024-07-05 17:20:00,913][04005] Num frames 6400... +[2024-07-05 17:20:00,987][04005] Num frames 6500... +[2024-07-05 17:20:01,063][04005] Num frames 6600... +[2024-07-05 17:20:01,149][04005] Num frames 6700... +[2024-07-05 17:20:01,223][04005] Num frames 6800... +[2024-07-05 17:20:01,293][04005] Num frames 6900... +[2024-07-05 17:20:01,363][04005] Num frames 7000... +[2024-07-05 17:20:01,435][04005] Num frames 7100... +[2024-07-05 17:20:01,507][04005] Num frames 7200... +[2024-07-05 17:20:01,583][04005] Num frames 7300... +[2024-07-05 17:20:01,657][04005] Num frames 7400... +[2024-07-05 17:20:01,729][04005] Num frames 7500... +[2024-07-05 17:20:01,801][04005] Num frames 7600... +[2024-07-05 17:20:01,874][04005] Num frames 7700... +[2024-07-05 17:20:01,947][04005] Num frames 7800... +[2024-07-05 17:20:02,020][04005] Num frames 7900... +[2024-07-05 17:20:02,092][04005] Num frames 8000... +[2024-07-05 17:20:02,170][04005] Num frames 8100... +[2024-07-05 17:20:02,239][04005] Num frames 8200... +[2024-07-05 17:20:02,311][04005] Num frames 8300... +[2024-07-05 17:20:02,402][04005] Avg episode rewards: #0: 55.376, true rewards: #0: 20.878 +[2024-07-05 17:20:02,403][04005] Avg episode reward: 55.376, avg true_objective: 20.878 +[2024-07-05 17:20:02,444][04005] Num frames 8400... +[2024-07-05 17:20:02,516][04005] Num frames 8500... +[2024-07-05 17:20:02,586][04005] Num frames 8600... +[2024-07-05 17:20:02,656][04005] Num frames 8700... +[2024-07-05 17:20:02,728][04005] Num frames 8800... +[2024-07-05 17:20:02,798][04005] Num frames 8900... +[2024-07-05 17:20:02,873][04005] Num frames 9000... +[2024-07-05 17:20:02,945][04005] Num frames 9100... +[2024-07-05 17:20:03,017][04005] Num frames 9200... +[2024-07-05 17:20:03,093][04005] Num frames 9300... +[2024-07-05 17:20:03,170][04005] Num frames 9400... +[2024-07-05 17:20:03,247][04005] Num frames 9500... +[2024-07-05 17:20:03,325][04005] Num frames 9600... +[2024-07-05 17:20:03,397][04005] Num frames 9700... +[2024-07-05 17:20:03,473][04005] Num frames 9800... +[2024-07-05 17:20:03,547][04005] Num frames 9900... +[2024-07-05 17:20:03,622][04005] Num frames 10000... +[2024-07-05 17:20:03,696][04005] Num frames 10100... +[2024-07-05 17:20:03,770][04005] Num frames 10200... +[2024-07-05 17:20:03,841][04005] Num frames 10300... +[2024-07-05 17:20:03,915][04005] Num frames 10400... +[2024-07-05 17:20:04,007][04005] Avg episode rewards: #0: 55.701, true rewards: #0: 20.902 +[2024-07-05 17:20:04,009][04005] Avg episode reward: 55.701, avg true_objective: 20.902 +[2024-07-05 17:20:04,047][04005] Num frames 10500... +[2024-07-05 17:20:04,120][04005] Num frames 10600... +[2024-07-05 17:20:04,193][04005] Num frames 10700... +[2024-07-05 17:20:04,266][04005] Num frames 10800... +[2024-07-05 17:20:04,339][04005] Num frames 10900... +[2024-07-05 17:20:04,409][04005] Num frames 11000... +[2024-07-05 17:20:04,482][04005] Num frames 11100... +[2024-07-05 17:20:04,555][04005] Num frames 11200... +[2024-07-05 17:20:04,626][04005] Num frames 11300... +[2024-07-05 17:20:04,712][04005] Num frames 11400... +[2024-07-05 17:20:04,786][04005] Num frames 11500... +[2024-07-05 17:20:04,857][04005] Num frames 11600... +[2024-07-05 17:20:04,931][04005] Num frames 11700... +[2024-07-05 17:20:05,003][04005] Num frames 11800... +[2024-07-05 17:20:05,075][04005] Num frames 11900... +[2024-07-05 17:20:05,148][04005] Num frames 12000... +[2024-07-05 17:20:05,220][04005] Num frames 12100... +[2024-07-05 17:20:05,292][04005] Num frames 12200... +[2024-07-05 17:20:05,365][04005] Num frames 12300... +[2024-07-05 17:20:05,442][04005] Num frames 12400... +[2024-07-05 17:20:05,517][04005] Num frames 12500... +[2024-07-05 17:20:05,609][04005] Avg episode rewards: #0: 56.084, true rewards: #0: 20.918 +[2024-07-05 17:20:05,611][04005] Avg episode reward: 56.084, avg true_objective: 20.918 +[2024-07-05 17:20:05,648][04005] Num frames 12600... +[2024-07-05 17:20:05,719][04005] Num frames 12700... +[2024-07-05 17:20:05,791][04005] Num frames 12800... +[2024-07-05 17:20:05,863][04005] Num frames 12900... +[2024-07-05 17:20:05,936][04005] Num frames 13000... +[2024-07-05 17:20:06,008][04005] Num frames 13100... +[2024-07-05 17:20:06,082][04005] Num frames 13200... +[2024-07-05 17:20:06,154][04005] Num frames 13300... +[2024-07-05 17:20:06,226][04005] Num frames 13400... +[2024-07-05 17:20:06,299][04005] Num frames 13500... +[2024-07-05 17:20:06,371][04005] Num frames 13600... +[2024-07-05 17:20:06,442][04005] Num frames 13700... +[2024-07-05 17:20:06,514][04005] Num frames 13800... +[2024-07-05 17:20:06,586][04005] Num frames 13900... +[2024-07-05 17:20:06,656][04005] Num frames 14000... +[2024-07-05 17:20:06,730][04005] Num frames 14100... +[2024-07-05 17:20:06,807][04005] Num frames 14200... +[2024-07-05 17:20:06,876][04005] Num frames 14300... +[2024-07-05 17:20:06,950][04005] Num frames 14400... +[2024-07-05 17:20:07,024][04005] Num frames 14500... +[2024-07-05 17:20:07,099][04005] Num frames 14600... +[2024-07-05 17:20:07,191][04005] Avg episode rewards: #0: 55.929, true rewards: #0: 20.930 +[2024-07-05 17:20:07,193][04005] Avg episode reward: 55.929, avg true_objective: 20.930 +[2024-07-05 17:20:07,231][04005] Num frames 14700... +[2024-07-05 17:20:07,304][04005] Num frames 14800... +[2024-07-05 17:20:07,376][04005] Num frames 14900... +[2024-07-05 17:20:07,452][04005] Num frames 15000... +[2024-07-05 17:20:07,525][04005] Num frames 15100... +[2024-07-05 17:20:07,599][04005] Num frames 15200... +[2024-07-05 17:20:07,673][04005] Num frames 15300... +[2024-07-05 17:20:07,748][04005] Num frames 15400... +[2024-07-05 17:20:07,823][04005] Num frames 15500... +[2024-07-05 17:20:07,897][04005] Num frames 15600... +[2024-07-05 17:20:07,972][04005] Num frames 15700... +[2024-07-05 17:20:08,056][04005] Num frames 15800... +[2024-07-05 17:20:08,129][04005] Num frames 15900... +[2024-07-05 17:20:08,201][04005] Num frames 16000... +[2024-07-05 17:20:08,273][04005] Num frames 16100... +[2024-07-05 17:20:08,346][04005] Num frames 16200... +[2024-07-05 17:20:08,415][04005] Num frames 16300... +[2024-07-05 17:20:08,487][04005] Num frames 16400... +[2024-07-05 17:20:08,559][04005] Num frames 16500... +[2024-07-05 17:20:08,631][04005] Num frames 16600... +[2024-07-05 17:20:08,703][04005] Num frames 16700... +[2024-07-05 17:20:08,782][04005] Avg episode rewards: #0: 56.417, true rewards: #0: 20.919 +[2024-07-05 17:20:08,783][04005] Avg episode reward: 56.417, avg true_objective: 20.919 +[2024-07-05 17:20:08,833][04005] Num frames 16800... +[2024-07-05 17:20:08,905][04005] Num frames 16900... +[2024-07-05 17:20:08,982][04005] Num frames 17000... +[2024-07-05 17:20:09,054][04005] Num frames 17100... +[2024-07-05 17:20:09,127][04005] Num frames 17200... +[2024-07-05 17:20:09,196][04005] Num frames 17300... +[2024-07-05 17:20:09,268][04005] Num frames 17400... +[2024-07-05 17:20:09,345][04005] Num frames 17500... +[2024-07-05 17:20:09,418][04005] Num frames 17600... +[2024-07-05 17:20:09,489][04005] Num frames 17700... +[2024-07-05 17:20:09,565][04005] Num frames 17800... +[2024-07-05 17:20:09,646][04005] Num frames 17900... +[2024-07-05 17:20:09,721][04005] Num frames 18000... +[2024-07-05 17:20:09,793][04005] Num frames 18100... +[2024-07-05 17:20:09,871][04005] Num frames 18200... +[2024-07-05 17:20:09,949][04005] Num frames 18300... +[2024-07-05 17:20:10,024][04005] Num frames 18400... +[2024-07-05 17:20:10,096][04005] Num frames 18500... +[2024-07-05 17:20:10,169][04005] Num frames 18600... +[2024-07-05 17:20:10,244][04005] Num frames 18700... +[2024-07-05 17:20:10,317][04005] Num frames 18800... +[2024-07-05 17:20:10,400][04005] Avg episode rewards: #0: 56.593, true rewards: #0: 20.928 +[2024-07-05 17:20:10,402][04005] Avg episode reward: 56.593, avg true_objective: 20.928 +[2024-07-05 17:20:10,454][04005] Num frames 18900... +[2024-07-05 17:20:10,524][04005] Num frames 19000... +[2024-07-05 17:20:10,598][04005] Num frames 19100... +[2024-07-05 17:20:10,671][04005] Num frames 19200... +[2024-07-05 17:20:10,745][04005] Num frames 19300... +[2024-07-05 17:20:10,819][04005] Num frames 19400... +[2024-07-05 17:20:10,894][04005] Num frames 19500... +[2024-07-05 17:20:10,968][04005] Num frames 19600... +[2024-07-05 17:20:11,043][04005] Num frames 19700... +[2024-07-05 17:20:11,122][04005] Num frames 19800... +[2024-07-05 17:20:11,198][04005] Num frames 19900... +[2024-07-05 17:20:11,271][04005] Num frames 20000... +[2024-07-05 17:20:11,346][04005] Num frames 20100... +[2024-07-05 17:20:11,431][04005] Num frames 20200... +[2024-07-05 17:20:11,503][04005] Num frames 20300... +[2024-07-05 17:20:11,576][04005] Num frames 20400... +[2024-07-05 17:20:11,651][04005] Num frames 20500... +[2024-07-05 17:20:11,725][04005] Num frames 20600... +[2024-07-05 17:20:11,798][04005] Num frames 20700... +[2024-07-05 17:20:11,870][04005] Num frames 20800... +[2024-07-05 17:20:11,943][04005] Num frames 20900... +[2024-07-05 17:20:12,022][04005] Avg episode rewards: #0: 57.434, true rewards: #0: 20.935 +[2024-07-05 17:20:12,023][04005] Avg episode reward: 57.434, avg true_objective: 20.935 +[2024-07-05 17:20:33,642][04005] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/replay.mp4! +[2024-07-05 17:23:13,711][04005] Loading existing experiment configuration from /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/config.json +[2024-07-05 17:23:13,712][04005] Overriding arg 'num_workers' with value 1 passed from command line +[2024-07-05 17:23:13,712][04005] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-07-05 17:23:13,713][04005] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-07-05 17:23:13,713][04005] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-07-05 17:23:13,714][04005] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-07-05 17:23:13,714][04005] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-07-05 17:23:13,715][04005] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-07-05 17:23:13,715][04005] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-07-05 17:23:13,716][04005] Adding new argument 'hf_repository'='ra9hu/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-07-05 17:23:13,716][04005] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-07-05 17:23:13,717][04005] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-07-05 17:23:13,717][04005] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-07-05 17:23:13,718][04005] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-07-05 17:23:13,718][04005] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-07-05 17:23:13,735][04005] RunningMeanStd input shape: (3, 72, 128) +[2024-07-05 17:23:13,736][04005] RunningMeanStd input shape: (1,) +[2024-07-05 17:23:13,744][04005] Num input channels: 3 +[2024-07-05 17:23:13,750][04005] Convolutional layer output size: 4608 +[2024-07-05 17:23:13,762][04005] Policy head output size: 512 +[2024-07-05 17:23:13,829][04005] Loading state from checkpoint /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/checkpoint_p0/checkpoint_000017091_70004736.pth... +[2024-07-05 17:23:14,511][04005] Num frames 100... +[2024-07-05 17:23:14,603][04005] Num frames 200... +[2024-07-05 17:23:14,696][04005] Num frames 300... +[2024-07-05 17:23:14,787][04005] Num frames 400... +[2024-07-05 17:23:14,884][04005] Num frames 500... +[2024-07-05 17:23:14,978][04005] Num frames 600... +[2024-07-05 17:23:15,065][04005] Num frames 700... +[2024-07-05 17:23:15,156][04005] Num frames 800... +[2024-07-05 17:23:15,233][04005] Num frames 900... +[2024-07-05 17:23:15,310][04005] Num frames 1000... +[2024-07-05 17:23:15,387][04005] Num frames 1100... +[2024-07-05 17:23:15,458][04005] Num frames 1200... +[2024-07-05 17:23:15,531][04005] Num frames 1300... +[2024-07-05 17:23:15,604][04005] Num frames 1400... +[2024-07-05 17:23:15,683][04005] Num frames 1500... +[2024-07-05 17:23:15,755][04005] Num frames 1600... +[2024-07-05 17:23:15,834][04005] Num frames 1700... +[2024-07-05 17:23:15,910][04005] Num frames 1800... +[2024-07-05 17:23:15,983][04005] Num frames 1900... +[2024-07-05 17:23:16,059][04005] Num frames 2000... +[2024-07-05 17:23:16,138][04005] Num frames 2100... +[2024-07-05 17:23:16,189][04005] Avg episode rewards: #0: 58.999, true rewards: #0: 21.000 +[2024-07-05 17:23:16,191][04005] Avg episode reward: 58.999, avg true_objective: 21.000 +[2024-07-05 17:23:16,266][04005] Num frames 2200... +[2024-07-05 17:23:16,340][04005] Num frames 2300... +[2024-07-05 17:23:16,414][04005] Num frames 2400... +[2024-07-05 17:23:16,486][04005] Num frames 2500... +[2024-07-05 17:23:16,558][04005] Num frames 2600... +[2024-07-05 17:23:16,635][04005] Num frames 2700... +[2024-07-05 17:23:16,711][04005] Num frames 2800... +[2024-07-05 17:23:16,786][04005] Num frames 2900... +[2024-07-05 17:23:16,867][04005] Num frames 3000... +[2024-07-05 17:23:16,942][04005] Num frames 3100... +[2024-07-05 17:23:17,016][04005] Num frames 3200... +[2024-07-05 17:23:17,087][04005] Num frames 3300... +[2024-07-05 17:23:17,161][04005] Num frames 3400... +[2024-07-05 17:23:17,234][04005] Num frames 3500... +[2024-07-05 17:23:17,309][04005] Num frames 3600... +[2024-07-05 17:23:17,383][04005] Num frames 3700... +[2024-07-05 17:23:17,457][04005] Num frames 3800... +[2024-07-05 17:23:17,530][04005] Num frames 3900... +[2024-07-05 17:23:17,604][04005] Num frames 4000... +[2024-07-05 17:23:17,677][04005] Num frames 4100... +[2024-07-05 17:23:17,753][04005] Num frames 4200... +[2024-07-05 17:23:17,805][04005] Avg episode rewards: #0: 62.499, true rewards: #0: 21.000 +[2024-07-05 17:23:17,806][04005] Avg episode reward: 62.499, avg true_objective: 21.000 +[2024-07-05 17:23:17,882][04005] Num frames 4300... +[2024-07-05 17:23:17,954][04005] Num frames 4400... +[2024-07-05 17:23:18,028][04005] Num frames 4500... +[2024-07-05 17:23:18,101][04005] Num frames 4600... +[2024-07-05 17:23:18,175][04005] Num frames 4700... +[2024-07-05 17:23:18,248][04005] Num frames 4800... +[2024-07-05 17:23:18,322][04005] Num frames 4900... +[2024-07-05 17:23:18,396][04005] Num frames 5000... +[2024-07-05 17:23:18,469][04005] Num frames 5100... +[2024-07-05 17:23:18,542][04005] Num frames 5200... +[2024-07-05 17:23:18,616][04005] Num frames 5300... +[2024-07-05 17:23:18,697][04005] Num frames 5400... +[2024-07-05 17:23:18,771][04005] Num frames 5500... +[2024-07-05 17:23:18,842][04005] Num frames 5600... +[2024-07-05 17:23:18,914][04005] Num frames 5700... +[2024-07-05 17:23:18,986][04005] Num frames 5800... +[2024-07-05 17:23:19,060][04005] Num frames 5900... +[2024-07-05 17:23:19,132][04005] Num frames 6000... +[2024-07-05 17:23:19,203][04005] Num frames 6100... +[2024-07-05 17:23:19,273][04005] Num frames 6200... +[2024-07-05 17:23:19,347][04005] Num frames 6300... +[2024-07-05 17:23:19,398][04005] Avg episode rewards: #0: 59.999, true rewards: #0: 21.000 +[2024-07-05 17:23:19,399][04005] Avg episode reward: 59.999, avg true_objective: 21.000 +[2024-07-05 17:23:19,472][04005] Num frames 6400... +[2024-07-05 17:23:19,542][04005] Num frames 6500... +[2024-07-05 17:23:19,616][04005] Num frames 6600... +[2024-07-05 17:23:19,686][04005] Num frames 6700... +[2024-07-05 17:23:19,757][04005] Num frames 6800... +[2024-07-05 17:23:19,829][04005] Num frames 6900... +[2024-07-05 17:23:19,901][04005] Num frames 7000... +[2024-07-05 17:23:19,972][04005] Num frames 7100... +[2024-07-05 17:23:20,043][04005] Num frames 7200... +[2024-07-05 17:23:20,113][04005] Num frames 7300... +[2024-07-05 17:23:20,183][04005] Num frames 7400... +[2024-07-05 17:23:20,253][04005] Num frames 7500... +[2024-07-05 17:23:20,323][04005] Num frames 7600... +[2024-07-05 17:23:20,392][04005] Num frames 7700... +[2024-07-05 17:23:20,463][04005] Num frames 7800... +[2024-07-05 17:23:20,536][04005] Num frames 7900... +[2024-07-05 17:23:20,613][04005] Avg episode rewards: #0: 56.829, true rewards: #0: 19.830 +[2024-07-05 17:23:20,615][04005] Avg episode reward: 56.829, avg true_objective: 19.830 +[2024-07-05 17:23:20,663][04005] Num frames 8000... +[2024-07-05 17:23:20,735][04005] Num frames 8100... +[2024-07-05 17:23:20,812][04005] Num frames 8200... +[2024-07-05 17:23:20,886][04005] Num frames 8300... +[2024-07-05 17:23:20,964][04005] Num frames 8400... +[2024-07-05 17:23:21,038][04005] Num frames 8500... +[2024-07-05 17:23:21,114][04005] Num frames 8600... +[2024-07-05 17:23:21,186][04005] Num frames 8700... +[2024-07-05 17:23:21,266][04005] Num frames 8800... +[2024-07-05 17:23:21,338][04005] Num frames 8900... +[2024-07-05 17:23:21,412][04005] Num frames 9000... +[2024-07-05 17:23:21,485][04005] Num frames 9100... +[2024-07-05 17:23:21,557][04005] Num frames 9200... +[2024-07-05 17:23:21,629][04005] Num frames 9300... +[2024-07-05 17:23:21,702][04005] Num frames 9400... +[2024-07-05 17:23:21,778][04005] Num frames 9500... +[2024-07-05 17:23:21,849][04005] Num frames 9600... +[2024-07-05 17:23:21,922][04005] Num frames 9700... +[2024-07-05 17:23:21,994][04005] Num frames 9800... +[2024-07-05 17:23:22,069][04005] Num frames 9900... +[2024-07-05 17:23:22,142][04005] Num frames 10000... +[2024-07-05 17:23:22,219][04005] Avg episode rewards: #0: 55.863, true rewards: #0: 20.064 +[2024-07-05 17:23:22,220][04005] Avg episode reward: 55.863, avg true_objective: 20.064 +[2024-07-05 17:23:22,272][04005] Num frames 10100... +[2024-07-05 17:23:22,345][04005] Num frames 10200... +[2024-07-05 17:23:22,418][04005] Num frames 10300... +[2024-07-05 17:23:22,496][04005] Num frames 10400... +[2024-07-05 17:23:22,569][04005] Num frames 10500... +[2024-07-05 17:23:22,640][04005] Num frames 10600... +[2024-07-05 17:23:22,711][04005] Num frames 10700... +[2024-07-05 17:23:22,785][04005] Num frames 10800... +[2024-07-05 17:23:22,859][04005] Num frames 10900... +[2024-07-05 17:23:22,931][04005] Num frames 11000... +[2024-07-05 17:23:23,002][04005] Num frames 11100... +[2024-07-05 17:23:23,074][04005] Num frames 11200... +[2024-07-05 17:23:23,144][04005] Num frames 11300... +[2024-07-05 17:23:23,216][04005] Num frames 11400... +[2024-07-05 17:23:23,291][04005] Num frames 11500... +[2024-07-05 17:23:23,364][04005] Num frames 11600... +[2024-07-05 17:23:23,439][04005] Num frames 11700... +[2024-07-05 17:23:23,509][04005] Num frames 11800... +[2024-07-05 17:23:23,581][04005] Num frames 11900... +[2024-07-05 17:23:23,658][04005] Num frames 12000... +[2024-07-05 17:23:23,735][04005] Num frames 12100... +[2024-07-05 17:23:23,812][04005] Avg episode rewards: #0: 56.719, true rewards: #0: 20.220 +[2024-07-05 17:23:23,814][04005] Avg episode reward: 56.719, avg true_objective: 20.220 +[2024-07-05 17:23:23,863][04005] Num frames 12200... +[2024-07-05 17:23:23,933][04005] Num frames 12300... +[2024-07-05 17:23:24,006][04005] Num frames 12400... +[2024-07-05 17:23:24,079][04005] Num frames 12500... +[2024-07-05 17:23:24,150][04005] Num frames 12600... +[2024-07-05 17:23:24,219][04005] Num frames 12700... +[2024-07-05 17:23:24,289][04005] Num frames 12800... +[2024-07-05 17:23:24,361][04005] Num frames 12900... +[2024-07-05 17:23:24,431][04005] Num frames 13000... +[2024-07-05 17:23:24,501][04005] Num frames 13100... +[2024-07-05 17:23:24,574][04005] Num frames 13200... +[2024-07-05 17:23:24,642][04005] Num frames 13300... +[2024-07-05 17:23:24,714][04005] Num frames 13400... +[2024-07-05 17:23:24,786][04005] Num frames 13500... +[2024-07-05 17:23:24,859][04005] Num frames 13600... +[2024-07-05 17:23:24,932][04005] Num frames 13700... +[2024-07-05 17:23:25,006][04005] Num frames 13800... +[2024-07-05 17:23:25,081][04005] Num frames 13900... +[2024-07-05 17:23:25,156][04005] Num frames 14000... +[2024-07-05 17:23:25,227][04005] Num frames 14100... +[2024-07-05 17:23:25,301][04005] Num frames 14200... +[2024-07-05 17:23:25,380][04005] Avg episode rewards: #0: 57.330, true rewards: #0: 20.331 +[2024-07-05 17:23:25,381][04005] Avg episode reward: 57.330, avg true_objective: 20.331 +[2024-07-05 17:23:25,430][04005] Num frames 14300... +[2024-07-05 17:23:25,502][04005] Num frames 14400... +[2024-07-05 17:23:25,574][04005] Num frames 14500... +[2024-07-05 17:23:25,646][04005] Num frames 14600... +[2024-07-05 17:23:25,716][04005] Num frames 14700... +[2024-07-05 17:23:25,802][04005] Num frames 14800... +[2024-07-05 17:23:25,876][04005] Num frames 14900... +[2024-07-05 17:23:25,950][04005] Num frames 15000... +[2024-07-05 17:23:26,022][04005] Num frames 15100... +[2024-07-05 17:23:26,096][04005] Num frames 15200... +[2024-07-05 17:23:26,171][04005] Num frames 15300... +[2024-07-05 17:23:26,243][04005] Num frames 15400... +[2024-07-05 17:23:26,315][04005] Num frames 15500... +[2024-07-05 17:23:26,389][04005] Num frames 15600... +[2024-07-05 17:23:26,464][04005] Num frames 15700... +[2024-07-05 17:23:26,538][04005] Num frames 15800... +[2024-07-05 17:23:26,614][04005] Num frames 15900... +[2024-07-05 17:23:26,691][04005] Num frames 16000... +[2024-07-05 17:23:26,793][04005] Num frames 16100... +[2024-07-05 17:23:26,897][04005] Num frames 16200... +[2024-07-05 17:23:26,972][04005] Num frames 16300... +[2024-07-05 17:23:27,050][04005] Avg episode rewards: #0: 57.789, true rewards: #0: 20.415 +[2024-07-05 17:23:27,051][04005] Avg episode reward: 57.789, avg true_objective: 20.415 +[2024-07-05 17:23:27,107][04005] Num frames 16400... +[2024-07-05 17:23:27,189][04005] Num frames 16500... +[2024-07-05 17:23:27,260][04005] Num frames 16600... +[2024-07-05 17:23:27,335][04005] Num frames 16700... +[2024-07-05 17:23:27,412][04005] Num frames 16800... +[2024-07-05 17:23:27,486][04005] Num frames 16900... +[2024-07-05 17:23:27,559][04005] Num frames 17000... +[2024-07-05 17:23:27,632][04005] Num frames 17100... +[2024-07-05 17:23:27,704][04005] Num frames 17200... +[2024-07-05 17:23:27,776][04005] Num frames 17300... +[2024-07-05 17:23:27,846][04005] Num frames 17400... +[2024-07-05 17:23:27,919][04005] Num frames 17500... +[2024-07-05 17:23:27,994][04005] Num frames 17600... +[2024-07-05 17:23:28,068][04005] Num frames 17700... +[2024-07-05 17:23:28,141][04005] Num frames 17800... +[2024-07-05 17:23:28,213][04005] Num frames 17900... +[2024-07-05 17:23:28,287][04005] Num frames 18000... +[2024-07-05 17:23:28,361][04005] Num frames 18100... +[2024-07-05 17:23:28,432][04005] Num frames 18200... +[2024-07-05 17:23:28,506][04005] Num frames 18300... +[2024-07-05 17:23:28,582][04005] Num frames 18400... +[2024-07-05 17:23:28,660][04005] Avg episode rewards: #0: 57.590, true rewards: #0: 20.480 +[2024-07-05 17:23:28,661][04005] Avg episode reward: 57.590, avg true_objective: 20.480 +[2024-07-05 17:23:28,711][04005] Num frames 18500... +[2024-07-05 17:23:28,783][04005] Num frames 18600... +[2024-07-05 17:23:28,854][04005] Num frames 18700... +[2024-07-05 17:23:28,928][04005] Num frames 18800... +[2024-07-05 17:23:29,002][04005] Num frames 18900... +[2024-07-05 17:23:29,077][04005] Num frames 19000... +[2024-07-05 17:23:29,148][04005] Num frames 19100... +[2024-07-05 17:23:29,229][04005] Num frames 19200... +[2024-07-05 17:23:29,303][04005] Num frames 19300... +[2024-07-05 17:23:29,373][04005] Num frames 19400... +[2024-07-05 17:23:29,445][04005] Num frames 19500... +[2024-07-05 17:23:29,516][04005] Num frames 19600... +[2024-07-05 17:23:29,586][04005] Num frames 19700... +[2024-07-05 17:23:29,657][04005] Num frames 19800... +[2024-07-05 17:23:29,728][04005] Num frames 19900... +[2024-07-05 17:23:29,800][04005] Num frames 20000... +[2024-07-05 17:23:29,872][04005] Num frames 20100... +[2024-07-05 17:23:29,942][04005] Num frames 20200... +[2024-07-05 17:23:30,012][04005] Num frames 20300... +[2024-07-05 17:23:30,089][04005] Num frames 20400... +[2024-07-05 17:23:30,162][04005] Num frames 20500... +[2024-07-05 17:23:30,244][04005] Avg episode rewards: #0: 57.831, true rewards: #0: 20.532 +[2024-07-05 17:23:30,245][04005] Avg episode reward: 57.831, avg true_objective: 20.532 +[2024-07-05 17:23:51,410][04005] Replay video saved to /home/raghu/DL/topics/RL/unit8B-AsyncPPO-SampleFactory/train_dir/conv_resnet/replay.mp4!