0xC4LL3's picture
First commit of PPO agent for SnowballTarget
86c1178
{
"name": "root",
"gauges": {
"SnowballTarget.Policy.Entropy.mean": {
"value": 0.8080255389213562,
"min": 0.8006569743156433,
"max": 2.880449056625366,
"count": 50
},
"SnowballTarget.Policy.Entropy.sum": {
"value": 7795.0224609375,
"min": 7668.88037109375,
"max": 29530.36328125,
"count": 50
},
"SnowballTarget.Step.mean": {
"value": 499976.0,
"min": 9952.0,
"max": 499976.0,
"count": 50
},
"SnowballTarget.Step.sum": {
"value": 499976.0,
"min": 9952.0,
"max": 499976.0,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
"value": 13.70994758605957,
"min": 0.10662788897752762,
"max": 13.713254928588867,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
"value": 2810.539306640625,
"min": 20.685810089111328,
"max": 2811.21728515625,
"count": 50
},
"SnowballTarget.Environment.EpisodeLength.mean": {
"value": 199.0,
"min": 199.0,
"max": 199.0,
"count": 50
},
"SnowballTarget.Environment.EpisodeLength.sum": {
"value": 10945.0,
"min": 8756.0,
"max": 10945.0,
"count": 50
},
"SnowballTarget.Environment.CumulativeReward.mean": {
"value": 26.98181818181818,
"min": 3.340909090909091,
"max": 27.295454545454547,
"count": 50
},
"SnowballTarget.Environment.CumulativeReward.sum": {
"value": 1484.0,
"min": 147.0,
"max": 1484.0,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicReward.mean": {
"value": 26.98181818181818,
"min": 3.340909090909091,
"max": 27.295454545454547,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicReward.sum": {
"value": 1484.0,
"min": 147.0,
"max": 1484.0,
"count": 50
},
"SnowballTarget.Losses.PolicyLoss.mean": {
"value": 0.06976127059726209,
"min": 0.06030115588338744,
"max": 0.07832502534295724,
"count": 50
},
"SnowballTarget.Losses.PolicyLoss.sum": {
"value": 0.13952254119452417,
"min": 0.12060231176677488,
"max": 0.22978644694190808,
"count": 50
},
"SnowballTarget.Losses.ValueLoss.mean": {
"value": 0.17290047205546322,
"min": 0.10760244184610253,
"max": 0.274918874689177,
"count": 50
},
"SnowballTarget.Losses.ValueLoss.sum": {
"value": 0.34580094411092643,
"min": 0.21520488369220506,
"max": 0.8172484678672809,
"count": 50
},
"SnowballTarget.Policy.LearningRate.mean": {
"value": 3.0528989824000066e-06,
"min": 3.0528989824000066e-06,
"max": 0.00029609280130240005,
"count": 50
},
"SnowballTarget.Policy.LearningRate.sum": {
"value": 6.105797964800013e-06,
"min": 6.105797964800013e-06,
"max": 0.0008367984210671999,
"count": 50
},
"SnowballTarget.Policy.Epsilon.mean": {
"value": 0.1010176,
"min": 0.1010176,
"max": 0.19869760000000006,
"count": 50
},
"SnowballTarget.Policy.Epsilon.sum": {
"value": 0.2020352,
"min": 0.2020352,
"max": 0.5789328,
"count": 50
},
"SnowballTarget.Policy.Beta.mean": {
"value": 6.07782400000001e-05,
"min": 6.07782400000001e-05,
"max": 0.004935010240000001,
"count": 50
},
"SnowballTarget.Policy.Beta.sum": {
"value": 0.0001215564800000002,
"min": 0.0001215564800000002,
"max": 0.013948746719999999,
"count": 50
},
"SnowballTarget.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 50
},
"SnowballTarget.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 50
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1696607960",
"python_version": "3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.0.1+cu118",
"numpy_version": "1.21.2",
"end_time_seconds": "1696609219"
},
"total": 1259.1893315080001,
"count": 1,
"self": 0.42507326299983106,
"children": {
"run_training.setup": {
"total": 0.07503764099999444,
"count": 1,
"self": 0.07503764099999444
},
"TrainerController.start_learning": {
"total": 1258.6892206040002,
"count": 1,
"self": 1.5029157839908294,
"children": {
"TrainerController._reset_env": {
"total": 7.609821055999987,
"count": 1,
"self": 7.609821055999987
},
"TrainerController.advance": {
"total": 1249.48331269701,
"count": 45474,
"self": 0.6869703100287552,
"children": {
"env_step": {
"total": 1248.7963423869812,
"count": 45474,
"self": 908.6577076179701,
"children": {
"SubprocessEnvManager._take_step": {
"total": 339.40114673201265,
"count": 45474,
"self": 3.783818501012661,
"children": {
"TorchPolicy.evaluate": {
"total": 335.617328231,
"count": 45474,
"self": 335.617328231
}
}
},
"workers": {
"total": 0.7374880369984567,
"count": 45474,
"self": 0.0,
"children": {
"worker_root": {
"total": 1255.476642340989,
"count": 45474,
"is_parallel": true,
"self": 621.9741630329793,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0021357239999133526,
"count": 1,
"is_parallel": true,
"self": 0.0006825169999729042,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0014532069999404484,
"count": 10,
"is_parallel": true,
"self": 0.0014532069999404484
}
}
},
"UnityEnvironment.step": {
"total": 0.09403989899999488,
"count": 1,
"is_parallel": true,
"self": 0.0006582159999197756,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0003922180000017761,
"count": 1,
"is_parallel": true,
"self": 0.0003922180000017761
},
"communicator.exchange": {
"total": 0.09050997899998947,
"count": 1,
"is_parallel": true,
"self": 0.09050997899998947
},
"steps_from_proto": {
"total": 0.0024794860000838526,
"count": 1,
"is_parallel": true,
"self": 0.0005248260000598748,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0019546600000239778,
"count": 10,
"is_parallel": true,
"self": 0.0019546600000239778
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 633.5024793080097,
"count": 45473,
"is_parallel": true,
"self": 26.95632814703731,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 14.103172915010305,
"count": 45473,
"is_parallel": true,
"self": 14.103172915010305
},
"communicator.exchange": {
"total": 493.55815578197235,
"count": 45473,
"is_parallel": true,
"self": 493.55815578197235
},
"steps_from_proto": {
"total": 98.88482246398974,
"count": 45473,
"is_parallel": true,
"self": 18.373831193906994,
"children": {
"_process_rank_one_or_two_observation": {
"total": 80.51099127008274,
"count": 454730,
"is_parallel": true,
"self": 80.51099127008274
}
}
}
}
}
}
}
}
}
}
}
}
},
"trainer_threads": {
"total": 7.024999968052725e-05,
"count": 1,
"self": 7.024999968052725e-05,
"children": {
"thread_root": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"trainer_advance": {
"total": 1236.751043562145,
"count": 1535132,
"is_parallel": true,
"self": 33.66566206121547,
"children": {
"process_trajectory": {
"total": 817.2834499789288,
"count": 1535132,
"is_parallel": true,
"self": 815.1829018469288,
"children": {
"RLTrainer._checkpoint": {
"total": 2.1005481320000854,
"count": 10,
"is_parallel": true,
"self": 2.1005481320000854
}
}
},
"_update_policy": {
"total": 385.8019315220007,
"count": 113,
"is_parallel": true,
"self": 168.56510877102403,
"children": {
"TorchPPOOptimizer.update": {
"total": 217.23682275097667,
"count": 11523,
"is_parallel": true,
"self": 217.23682275097667
}
}
}
}
}
}
}
}
},
"TrainerController._save_models": {
"total": 0.09310081699959483,
"count": 1,
"self": 0.0008990289998109802,
"children": {
"RLTrainer._checkpoint": {
"total": 0.09220178799978385,
"count": 1,
"self": 0.09220178799978385
}
}
}
}
}
}
}