{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.3899741172790527, "min": 1.2276161909103394, "max": 3.2957534790039062, "count": 5000 }, "SoccerTwos.Policy.Entropy.sum": { "value": 28155.31640625, "min": 21521.65625, "max": 105464.0546875, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 60.839506172839506, "min": 38.688, "max": 999.0, "count": 5000 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19712.0, "min": 16152.0, "max": 23520.0, "count": 5000 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1870.4985614119562, "min": 1197.5521306715968, "max": 1916.0121631153543, "count": 4969 }, "SoccerTwos.Self-play.ELO.sum": { "value": 303020.7669487369, "min": 2395.757321517054, "max": 453654.5914092298, "count": 4969 }, "SoccerTwos.Step.mean": { "value": 49999994.0, "min": 9610.0, "max": 49999994.0, "count": 5000 }, "SoccerTwos.Step.sum": { "value": 49999994.0, "min": 9610.0, "max": 49999994.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.03807433322072029, "min": -0.12652990221977234, "max": 0.15911389887332916, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -6.12996768951416, "min": -22.522321701049805, "max": 28.481388092041016, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.038226328790187836, "min": -0.12583494186401367, "max": 0.15776734054088593, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -6.1544389724731445, "min": -22.39862060546875, "max": 28.240352630615234, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.04053664577673681, "min": -0.5775200009346009, "max": 0.4059013699832028, "count": 5000 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -6.5263999700546265, "min": -79.41519975662231, "max": 63.255199670791626, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.04053664577673681, "min": -0.5775200009346009, "max": 0.4059013699832028, "count": 5000 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -6.5263999700546265, "min": -79.41519975662231, "max": 63.255199670791626, "count": 5000 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5000 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.017302362948733694, "min": 0.009955974128630866, "max": 0.026654956691587964, "count": 2424 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.017302362948733694, "min": 0.009955974128630866, "max": 0.026654956691587964, "count": 2424 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.09518437186876932, "min": 9.835654373091529e-06, "max": 0.13156305029988288, "count": 2424 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.09518437186876932, "min": 9.835654373091529e-06, "max": 0.13156305029988288, "count": 2424 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.09614008665084839, "min": 9.792757555260324e-06, "max": 0.13342416509985924, "count": 2424 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.09614008665084839, "min": 9.792757555260324e-06, "max": 0.13342416509985924, "count": 2424 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2424 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2424 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 2424 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 2424 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2424 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2424 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1704282801", "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:34:57) [MSC v.1936 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\19860\\anaconda3\\envs\\huggingface\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.1.2+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1704436111" }, "total": 153307.24945819995, "count": 1, "self": 3.7698575999820605, "children": { "run_training.setup": { "total": 0.09180289995856583, "count": 1, "self": 0.09180289995856583 }, "TrainerController.start_learning": { "total": 153303.3877977, "count": 1, "self": 74.69348148081917, "children": { "TrainerController._reset_env": { "total": 7.401207100308966, "count": 250, "self": 7.401207100308966 }, "TrainerController.advance": { "total": 153221.13427531888, "count": 3460911, "self": 71.18974651861936, "children": { "env_step": { "total": 49880.210913505405, "count": 3460911, "self": 38018.34299747966, "children": { "SubprocessEnvManager._take_step": { "total": 11817.527951792814, "count": 3460911, "self": 378.1507700883085, "children": { "TorchPolicy.evaluate": { "total": 11439.377181704505, "count": 6279868, "self": 11439.377181704505 } } }, "workers": { "total": 44.33996423293138, "count": 3460911, "self": 0.0, "children": { "worker_root": { "total": 153206.0700721286, "count": 3460911, "is_parallel": true, "self": 123128.14828025631, "children": { "steps_from_proto": { "total": 0.3770651009399444, "count": 500, "is_parallel": true, "self": 0.07974869868485257, "children": { "_process_rank_one_or_two_observation": { "total": 0.2973164022550918, "count": 2000, "is_parallel": true, "self": 0.2973164022550918 } } }, "UnityEnvironment.step": { "total": 30077.54472677136, "count": 3460911, "is_parallel": true, "self": 1439.1714160880074, "children": { "UnityEnvironment._generate_step_input": { "total": 1116.3087238123408, "count": 3460911, "is_parallel": true, "self": 1116.3087238123408 }, "communicator.exchange": { "total": 22613.26101512561, "count": 3460911, "is_parallel": true, "self": 22613.26101512561 }, "steps_from_proto": { "total": 4908.803571745404, "count": 6921822, "is_parallel": true, "self": 1051.411938498437, "children": { "_process_rank_one_or_two_observation": { "total": 3857.391633246967, "count": 27687288, "is_parallel": true, "self": 3857.391633246967 } } } } } } } } } } }, "trainer_advance": { "total": 103269.73361529486, "count": 3460911, "self": 530.0413414256764, "children": { "process_trajectory": { "total": 12833.904439967591, "count": 3460911, "self": 12822.402508067782, "children": { "RLTrainer._checkpoint": { "total": 11.501931899809279, "count": 100, "self": 11.501931899809279 } } }, "_update_policy": { "total": 89905.78783390159, "count": 2424, "self": 6544.650359503983, "children": { "TorchPOCAOptimizer.update": { "total": 83361.1374743976, "count": 72720, "self": 83361.1374743976 } } } } } } }, "trainer_threads": { "total": 1.00000761449337e-06, "count": 1, "self": 1.00000761449337e-06 }, "TrainerController._save_models": { "total": 0.1588327999925241, "count": 1, "self": 0.03170330007560551, "children": { "RLTrainer._checkpoint": { "total": 0.12712949991691858, "count": 1, "self": 0.12712949991691858 } } } } } } }