PPO-SnowballTarget / run_logs /timers.json

First commit of PPO agent for SnowballTarget

86c1178 about 1 year ago

18.5 kB

	{
	"name": "root",
	"gauges": {
	"SnowballTarget.Policy.Entropy.mean": {
	"value": 0.8080255389213562,
	"min": 0.8006569743156433,
	"max": 2.880449056625366,
	"count": 50
	},
	"SnowballTarget.Policy.Entropy.sum": {
	"value": 7795.0224609375,
	"min": 7668.88037109375,
	"max": 29530.36328125,
	"count": 50
	},
	"SnowballTarget.Step.mean": {
	"value": 499976.0,
	"min": 9952.0,
	"max": 499976.0,
	"count": 50
	},
	"SnowballTarget.Step.sum": {
	"value": 499976.0,
	"min": 9952.0,
	"max": 499976.0,
	"count": 50
	},
	"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
	"value": 13.70994758605957,
	"min": 0.10662788897752762,
	"max": 13.713254928588867,
	"count": 50
	},
	"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
	"value": 2810.539306640625,
	"min": 20.685810089111328,
	"max": 2811.21728515625,
	"count": 50
	},
	"SnowballTarget.Environment.EpisodeLength.mean": {
	"value": 199.0,
	"min": 199.0,
	"max": 199.0,
	"count": 50
	},
	"SnowballTarget.Environment.EpisodeLength.sum": {
	"value": 10945.0,
	"min": 8756.0,
	"max": 10945.0,
	"count": 50
	},
	"SnowballTarget.Environment.CumulativeReward.mean": {
	"value": 26.98181818181818,
	"min": 3.340909090909091,
	"max": 27.295454545454547,
	"count": 50
	},
	"SnowballTarget.Environment.CumulativeReward.sum": {
	"value": 1484.0,
	"min": 147.0,
	"max": 1484.0,
	"count": 50
	},
	"SnowballTarget.Policy.ExtrinsicReward.mean": {
	"value": 26.98181818181818,
	"min": 3.340909090909091,
	"max": 27.295454545454547,
	"count": 50
	},
	"SnowballTarget.Policy.ExtrinsicReward.sum": {
	"value": 1484.0,
	"min": 147.0,
	"max": 1484.0,
	"count": 50
	},
	"SnowballTarget.Losses.PolicyLoss.mean": {
	"value": 0.06976127059726209,
	"min": 0.06030115588338744,
	"max": 0.07832502534295724,
	"count": 50
	},
	"SnowballTarget.Losses.PolicyLoss.sum": {
	"value": 0.13952254119452417,
	"min": 0.12060231176677488,
	"max": 0.22978644694190808,
	"count": 50
	},
	"SnowballTarget.Losses.ValueLoss.mean": {
	"value": 0.17290047205546322,
	"min": 0.10760244184610253,
	"max": 0.274918874689177,
	"count": 50
	},
	"SnowballTarget.Losses.ValueLoss.sum": {
	"value": 0.34580094411092643,
	"min": 0.21520488369220506,
	"max": 0.8172484678672809,
	"count": 50
	},
	"SnowballTarget.Policy.LearningRate.mean": {
	"value": 3.0528989824000066e-06,
	"min": 3.0528989824000066e-06,
	"max": 0.00029609280130240005,
	"count": 50
	},
	"SnowballTarget.Policy.LearningRate.sum": {
	"value": 6.105797964800013e-06,
	"min": 6.105797964800013e-06,
	"max": 0.0008367984210671999,
	"count": 50
	},
	"SnowballTarget.Policy.Epsilon.mean": {
	"value": 0.1010176,
	"min": 0.1010176,
	"max": 0.19869760000000006,
	"count": 50
	},
	"SnowballTarget.Policy.Epsilon.sum": {
	"value": 0.2020352,
	"min": 0.2020352,
	"max": 0.5789328,
	"count": 50
	},
	"SnowballTarget.Policy.Beta.mean": {
	"value": 6.07782400000001e-05,
	"min": 6.07782400000001e-05,
	"max": 0.004935010240000001,
	"count": 50
	},
	"SnowballTarget.Policy.Beta.sum": {
	"value": 0.0001215564800000002,
	"min": 0.0001215564800000002,
	"max": 0.013948746719999999,
	"count": 50
	},
	"SnowballTarget.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 50
	},
	"SnowballTarget.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 50
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1696607960",
	"python_version": "3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.0.1+cu118",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1696609219"
	},
	"total": 1259.1893315080001,
	"count": 1,
	"self": 0.42507326299983106,
	"children": {
	"run_training.setup": {
	"total": 0.07503764099999444,
	"count": 1,
	"self": 0.07503764099999444
	},
	"TrainerController.start_learning": {
	"total": 1258.6892206040002,
	"count": 1,
	"self": 1.5029157839908294,
	"children": {
	"TrainerController._reset_env": {
	"total": 7.609821055999987,
	"count": 1,
	"self": 7.609821055999987
	},
	"TrainerController.advance": {
	"total": 1249.48331269701,
	"count": 45474,
	"self": 0.6869703100287552,
	"children": {
	"env_step": {
	"total": 1248.7963423869812,
	"count": 45474,
	"self": 908.6577076179701,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 339.40114673201265,
	"count": 45474,
	"self": 3.783818501012661,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 335.617328231,
	"count": 45474,
	"self": 335.617328231
	}
	}
	},
	"workers": {
	"total": 0.7374880369984567,
	"count": 45474,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 1255.476642340989,
	"count": 45474,
	"is_parallel": true,
	"self": 621.9741630329793,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0021357239999133526,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006825169999729042,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0014532069999404484,
	"count": 10,
	"is_parallel": true,
	"self": 0.0014532069999404484
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.09403989899999488,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006582159999197756,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0003922180000017761,
	"count": 1,
	"is_parallel": true,
	"self": 0.0003922180000017761
	},
	"communicator.exchange": {
	"total": 0.09050997899998947,
	"count": 1,
	"is_parallel": true,
	"self": 0.09050997899998947
	},
	"steps_from_proto": {
	"total": 0.0024794860000838526,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005248260000598748,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0019546600000239778,
	"count": 10,
	"is_parallel": true,
	"self": 0.0019546600000239778
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 633.5024793080097,
	"count": 45473,
	"is_parallel": true,
	"self": 26.95632814703731,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 14.103172915010305,
	"count": 45473,
	"is_parallel": true,
	"self": 14.103172915010305
	},
	"communicator.exchange": {
	"total": 493.55815578197235,
	"count": 45473,
	"is_parallel": true,
	"self": 493.55815578197235
	},
	"steps_from_proto": {
	"total": 98.88482246398974,
	"count": 45473,
	"is_parallel": true,
	"self": 18.373831193906994,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 80.51099127008274,
	"count": 454730,
	"is_parallel": true,
	"self": 80.51099127008274
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 7.024999968052725e-05,
	"count": 1,
	"self": 7.024999968052725e-05,
	"children": {
	"thread_root": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"trainer_advance": {
	"total": 1236.751043562145,
	"count": 1535132,
	"is_parallel": true,
	"self": 33.66566206121547,
	"children": {
	"process_trajectory": {
	"total": 817.2834499789288,
	"count": 1535132,
	"is_parallel": true,
	"self": 815.1829018469288,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 2.1005481320000854,
	"count": 10,
	"is_parallel": true,
	"self": 2.1005481320000854
	}
	}
	},
	"_update_policy": {
	"total": 385.8019315220007,
	"count": 113,
	"is_parallel": true,
	"self": 168.56510877102403,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 217.23682275097667,
	"count": 11523,
	"is_parallel": true,
	"self": 217.23682275097667
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"TrainerController._save_models": {
	"total": 0.09310081699959483,
	"count": 1,
	"self": 0.0008990289998109802,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.09220178799978385,
	"count": 1,
	"self": 0.09220178799978385
	}
	}
	}
	}
	}
	}
	}