{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.4921015501022339, "min": 1.4691133499145508, "max": 3.2957165241241455, "count": 1551 }, "SoccerTwos.Policy.Entropy.sum": { "value": 30462.744140625, "min": 26408.48828125, "max": 154321.078125, "count": 1551 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 59.45783132530121, "min": 36.661654135338345, "max": 999.0, "count": 1551 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19740.0, "min": 16344.0, "max": 24224.0, "count": 1551 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1576.1408347423596, "min": 1183.7826067404699, "max": 1593.1353287199727, "count": 1495 }, "SoccerTwos.Self-play.ELO.sum": { "value": 261639.3785672317, "min": 2367.5652134809397, "max": 414523.60237013956, "count": 1495 }, "SoccerTwos.Step.mean": { "value": 15509946.0, "min": 9128.0, "max": 15509946.0, "count": 1551 }, "SoccerTwos.Step.sum": { "value": 15509946.0, "min": 9128.0, "max": 15509946.0, "count": 1551 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.04154122993350029, "min": -0.11701029539108276, "max": 0.1731089949607849, "count": 1551 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -6.854302883148193, "min": -22.7357177734375, "max": 30.331287384033203, "count": 1551 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.039187878370285034, "min": -0.11552216112613678, "max": 0.17324113845825195, "count": 1551 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -6.466000080108643, "min": -23.004806518554688, "max": 30.663681030273438, "count": 1551 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1551 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 1551 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.12841212099248714, "min": -0.6284636340358041, "max": 0.41016470684724693, "count": 1551 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -21.187999963760376, "min": -60.05379956960678, "max": 57.90720021724701, "count": 1551 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.12841212099248714, "min": -0.6284636340358041, "max": 0.41016470684724693, "count": 1551 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -21.187999963760376, "min": -60.05379956960678, "max": 57.90720021724701, "count": 1551 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1551 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 1551 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.017143514207176243, "min": 0.01058117716262738, "max": 0.025209768011700363, "count": 748 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.017143514207176243, "min": 0.01058117716262738, "max": 0.025209768011700363, "count": 748 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10296346992254257, "min": 6.034289934101859e-07, "max": 0.13135499681035678, "count": 748 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10296346992254257, "min": 6.034289934101859e-07, "max": 0.13135499681035678, "count": 748 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10444200138250986, "min": 6.022205136938889e-07, "max": 0.13411133885383605, "count": 748 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.10444200138250986, "min": 6.022205136938889e-07, "max": 0.13411133885383605, "count": 748 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 748 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 748 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 748 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 748 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 748 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 748 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1726733581", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\strokeRehab2\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --force", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.4.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1726769325" }, "total": 35746.4820196, "count": 1, "self": 0.10041909999563359, "children": { "run_training.setup": { "total": 0.12742990000015197, "count": 1, "self": 0.12742990000015197 }, "TrainerController.start_learning": { "total": 35746.254170600005, "count": 1, "self": 26.430566100643773, "children": { "TrainerController._reset_env": { "total": 6.083992200014109, "count": 78, "self": 6.083992200014109 }, "TrainerController.advance": { "total": 35713.596034799346, "count": 1071733, "self": 22.060118399051134, "children": { "env_step": { "total": 17056.924595200493, "count": 1071733, "self": 13278.25609389992, "children": { "SubprocessEnvManager._take_step": { "total": 3764.1669275997756, "count": 1071733, "self": 121.46064880429367, "children": { "TorchPolicy.evaluate": { "total": 3642.706278795482, "count": 1955344, "self": 3642.706278795482 } } }, "workers": { "total": 14.501573700797962, "count": 1071733, "self": 0.0, "children": { "worker_root": { "total": 35709.0399978004, "count": 1071733, "is_parallel": true, "self": 25217.01502999912, "children": { "steps_from_proto": { "total": 0.1263445999607029, "count": 156, "is_parallel": true, "self": 0.026834199902623368, "children": { "_process_rank_one_or_two_observation": { "total": 0.09951040005807954, "count": 624, "is_parallel": true, "self": 0.09951040005807954 } } }, "UnityEnvironment.step": { "total": 10491.89862320132, "count": 1071733, "is_parallel": true, "self": 539.008460898096, "children": { "UnityEnvironment._generate_step_input": { "total": 438.1818923024848, "count": 1071733, "is_parallel": true, "self": 438.1818923024848 }, "communicator.exchange": { "total": 7645.466054198794, "count": 1071733, "is_parallel": true, "self": 7645.466054198794 }, "steps_from_proto": { "total": 1869.242215801944, "count": 2143466, "is_parallel": true, "self": 392.3517038052396, "children": { "_process_rank_one_or_two_observation": { "total": 1476.8905119967044, "count": 8573864, "is_parallel": true, "self": 1476.8905119967044 } } } } } } } } } } }, "trainer_advance": { "total": 18634.6113211998, "count": 1071733, "self": 178.69945719789393, "children": { "process_trajectory": { "total": 3682.171064701863, "count": 1071733, "self": 3677.8730240018704, "children": { "RLTrainer._checkpoint": { "total": 4.298040699992725, "count": 31, "self": 4.298040699992725 } } }, "_update_policy": { "total": 14773.740799300045, "count": 748, "self": 1997.9992662000204, "children": { "TorchPOCAOptimizer.update": { "total": 12775.741533100025, "count": 22440, "self": 12775.741533100025 } } } } } } }, "trainer_threads": { "total": 1.4999968698248267e-06, "count": 1, "self": 1.4999968698248267e-06 }, "TrainerController._save_models": { "total": 0.14357600000221282, "count": 1, "self": 0.018747600006463472, "children": { "RLTrainer._checkpoint": { "total": 0.12482839999574935, "count": 1, "self": 0.12482839999574935 } } } } } } }