First commit of POCA agent for SoccerTwos

9b36ebf about 1 year ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.372818112373352,
	"min": 1.3610069751739502,
	"max": 3.295746088027954,
	"count": 2406
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 27544.22265625,
	"min": 13481.908203125,
	"max": 116511.125,
	"count": 2406
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 44.28181818181818,
	"min": 40.8235294117647,
	"max": 999.0,
	"count": 2406
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19484.0,
	"min": 16780.0,
	"max": 23440.0,
	"count": 2406
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1616.2424260396367,
	"min": 1190.5438488044906,
	"max": 1685.6915875872412,
	"count": 2386
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 355573.3337287201,
	"min": 2384.762442138258,
	"max": 374386.6988585349,
	"count": 2386
	},
	"SoccerTwos.Step.mean": {
	"value": 24059983.0,
	"min": 9422.0,
	"max": 24059983.0,
	"count": 2406
	},
	"SoccerTwos.Step.sum": {
	"value": 24059983.0,
	"min": 9422.0,
	"max": 24059983.0,
	"count": 2406
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.044170547276735306,
	"min": -0.1438433974981308,
	"max": 0.1603071093559265,
	"count": 2406
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -9.761691093444824,
	"min": -24.165691375732422,
	"max": 30.29804229736328,
	"count": 2406
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.04460342600941658,
	"min": -0.146144837141037,
	"max": 0.16351306438446045,
	"count": 2406
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -9.857357025146484,
	"min": -24.55233383178711,
	"max": 30.903968811035156,
	"count": 2406
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 2406
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 2406
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.11660633529473215,
	"min": -0.5490857149873462,
	"max": 0.45455000166957443,
	"count": 2406
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -25.770000100135803,
	"min": -57.58159965276718,
	"max": 67.27340024709702,
	"count": 2406
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.11660633529473215,
	"min": -0.5490857149873462,
	"max": 0.45455000166957443,
	"count": 2406
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -25.770000100135803,
	"min": -57.58159965276718,
	"max": 67.27340024709702,
	"count": 2406
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 2406
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 2406
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.019324933231109752,
	"min": 0.010011145524913445,
	"max": 0.02468691694860657,
	"count": 1163
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.019324933231109752,
	"min": 0.010011145524913445,
	"max": 0.02468691694860657,
	"count": 1163
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.1097671389579773,
	"min": 4.830325057506949e-05,
	"max": 0.1255221885939439,
	"count": 1163
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.1097671389579773,
	"min": 4.830325057506949e-05,
	"max": 0.1255221885939439,
	"count": 1163
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.11143294448653857,
	"min": 4.8788184418905685e-05,
	"max": 0.12767568851510683,
	"count": 1163
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.11143294448653857,
	"min": 4.8788184418905685e-05,
	"max": 0.12767568851510683,
	"count": 1163
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 1163
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 1163
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 1163
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 1163
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 1163
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 1163
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1696684423",
	"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
	"command_line_arguments": "/home/calle/mambaforge/envs/rl/bin/mlagents-learn config/poca/SoccerTwos.yaml --env training-envs-executables/SoccerTwos.x86_64 --run-id SoccerTwos --no-graphics --force",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.1.0+cu121",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1696753741"
	},
	"total": 69317.840842983,
	"count": 1,
	"self": 4.200484507993679,
	"children": {
	"run_training.setup": {
	"total": 0.027740798999730032,
	"count": 1,
	"self": 0.027740798999730032
	},
	"TrainerController.start_learning": {
	"total": 69313.612617676,
	"count": 1,
	"self": 38.41506820991344,
	"children": {
	"TrainerController._reset_env": {
	"total": 7.527282390986329,
	"count": 121,
	"self": 7.527282390986329
	},
	"TrainerController.advance": {
	"total": 69267.21945307309,
	"count": 1659962,
	"self": 41.54071615994326,
	"children": {
	"env_step": {
	"total": 53528.12489961531,
	"count": 1659962,
	"self": 37496.70817228866,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 16008.018884459463,
	"count": 1659962,
	"self": 315.22460969659915,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 15692.794274762864,
	"count": 3027070,
	"self": 15692.794274762864
	}
	}
	},
	"workers": {
	"total": 23.397842867188956,
	"count": 1659962,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 69216.53500549789,
	"count": 1659962,
	"is_parallel": true,
	"self": 36265.40393812658,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.004964599999766506,
	"count": 2,
	"is_parallel": true,
	"self": 0.0016121999997267267,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.003352400000039779,
	"count": 8,
	"is_parallel": true,
	"self": 0.003352400000039779
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.051478497999596584,
	"count": 1,
	"is_parallel": true,
	"self": 0.001723199999105418,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0004969999999957508,
	"count": 1,
	"is_parallel": true,
	"self": 0.0004969999999957508
	},
	"communicator.exchange": {
	"total": 0.044944899000256555,
	"count": 1,
	"is_parallel": true,
	"self": 0.044944899000256555
	},
	"steps_from_proto": {
	"total": 0.00431339900023886,
	"count": 2,
	"is_parallel": true,
	"self": 0.00090240000099584,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.00341099899924302,
	"count": 8,
	"is_parallel": true,
	"self": 0.00341099899924302
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 32950.86803147133,
	"count": 1659961,
	"is_parallel": true,
	"self": 1041.6733843843394,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 700.0652687746642,
	"count": 1659961,
	"is_parallel": true,
	"self": 700.0652687746642
	},
	"communicator.exchange": {
	"total": 27563.45005935148,
	"count": 1659961,
	"is_parallel": true,
	"self": 27563.45005935148
	},
	"steps_from_proto": {
	"total": 3645.679318960845,
	"count": 3319922,
	"is_parallel": true,
	"self": 775.0794357809136,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 2870.5998831799316,
	"count": 13279688,
	"is_parallel": true,
	"self": 2870.5998831799316
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.263035899980423,
	"count": 240,
	"is_parallel": true,
	"self": 0.05318589999569667,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.2098499999847263,
	"count": 960,
	"is_parallel": true,
	"self": 0.2098499999847263
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 15697.553837297839,
	"count": 1659962,
	"self": 277.09851129868,
	"children": {
	"process_trajectory": {
	"total": 7099.193064037109,
	"count": 1659962,
	"self": 7080.448951666099,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 18.744112371010488,
	"count": 48,
	"self": 18.744112371010488
	}
	}
	},
	"_update_policy": {
	"total": 8321.262261962049,
	"count": 1164,
	"self": 4029.9179502190127,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 4291.344311743036,
	"count": 34920,
	"self": 4291.344311743036
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.00000761449337e-06,
	"count": 1,
	"self": 1.00000761449337e-06
	},
	"TrainerController._save_models": {
	"total": 0.45081300199672114,
	"count": 1,
	"self": 0.012058000997058116,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.438755000999663,
	"count": 1,
	"self": 0.438755000999663
	}
	}
	}
	}
	}
	}
	}