{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.383643865585327, "min": 2.3348634243011475, "max": 3.295682668685913, "count": 500 }, "SoccerTwos.Policy.Entropy.sum": { "value": 44240.4296875, "min": 22492.93359375, "max": 109043.3515625, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 52.630434782608695, "min": 49.255102040816325, "max": 999.0, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19368.0, "min": 16244.0, "max": 25456.0, "count": 500 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1562.4070536251486, "min": 1190.3704049646515, "max": 1580.6263638554033, "count": 459 }, "SoccerTwos.Self-play.ELO.sum": { "value": 287482.8978670273, "min": 2380.740809929303, "max": 307364.83690598514, "count": 459 }, "SoccerTwos.Step.mean": { "value": 4999984.0, "min": 9744.0, "max": 4999984.0, "count": 500 }, "SoccerTwos.Step.sum": { "value": 4999984.0, "min": 9744.0, "max": 4999984.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.0013122053351253271, "min": -0.09658608585596085, "max": 0.21021796762943268, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -0.24144577980041504, "min": -15.840118408203125, "max": 26.250734329223633, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.00833477545529604, "min": -0.09836539626121521, "max": 0.20989856123924255, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -1.5335986614227295, "min": -16.131925582885742, "max": 25.586517333984375, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.005510870529257733, "min": -0.6272235267302569, "max": 0.5677454547448592, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -1.0140001773834229, "min": -57.839200019836426, "max": 62.45200002193451, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.005510870529257733, "min": -0.6272235267302569, "max": 0.5677454547448592, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -1.0140001773834229, "min": -57.839200019836426, "max": 62.45200002193451, "count": 500 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.013803280974389053, "min": 0.01127692673180718, "max": 0.023302046527775625, "count": 239 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.013803280974389053, "min": 0.01127692673180718, "max": 0.023302046527775625, "count": 239 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.14569955815871558, "min": 1.2603901647404806e-05, "max": 0.15602067162593206, "count": 239 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.14569955815871558, "min": 1.2603901647404806e-05, "max": 0.15602067162593206, "count": 239 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.14723237057526906, "min": 1.1774382680111254e-05, "max": 0.1580161690711975, "count": 239 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.14723237057526906, "min": 1.1774382680111254e-05, "max": 0.1580161690711975, "count": 239 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 239 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 239 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 239 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 239 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.007000000000000002, "min": 0.007000000000000002, "max": 0.007000000000000002, "count": 239 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.007000000000000002, "min": 0.007000000000000002, "max": 0.007000000000000002, "count": 239 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1726643592", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\User\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.4.1+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1726652508" }, "total": 8916.445450400002, "count": 1, "self": 0.16409519995795563, "children": { "run_training.setup": { "total": 0.059361300023738295, "count": 1, "self": 0.059361300023738295 }, "TrainerController.start_learning": { "total": 8916.22199390002, "count": 1, "self": 5.2189109225873835, "children": { "TrainerController._reset_env": { "total": 3.8198256999603473, "count": 25, "self": 3.8198256999603473 }, "TrainerController.advance": { "total": 8907.104790277488, "count": 335402, "self": 5.323563448386267, "children": { "env_step": { "total": 3598.9406019230373, "count": 335402, "self": 2680.055818712106, "children": { "SubprocessEnvManager._take_step": { "total": 915.6580763242673, "count": 335402, "self": 31.165901128493715, "children": { "TorchPolicy.evaluate": { "total": 884.4921751957736, "count": 634234, "self": 884.4921751957736 } } }, "workers": { "total": 3.226706886664033, "count": 335402, "self": 0.0, "children": { "worker_root": { "total": 8906.297773706261, "count": 335402, "is_parallel": true, "self": 6817.630118792469, "children": { "steps_from_proto": { "total": 0.024310700013302267, "count": 50, "is_parallel": true, "self": 0.004851500445511192, "children": { "_process_rank_one_or_two_observation": { "total": 0.019459199567791075, "count": 200, "is_parallel": true, "self": 0.019459199567791075 } } }, "UnityEnvironment.step": { "total": 2088.6433442137786, "count": 335402, "is_parallel": true, "self": 108.99790160538396, "children": { "UnityEnvironment._generate_step_input": { "total": 88.95755818899488, "count": 335402, "is_parallel": true, "self": 88.95755818899488 }, "communicator.exchange": { "total": 1542.3231714186259, "count": 335402, "is_parallel": true, "self": 1542.3231714186259 }, "steps_from_proto": { "total": 348.3647130007739, "count": 670804, "is_parallel": true, "self": 72.32217171305092, "children": { "_process_rank_one_or_two_observation": { "total": 276.042541287723, "count": 2683216, "is_parallel": true, "self": 276.042541287723 } } } } } } } } } } }, "trainer_advance": { "total": 5302.8406249060645, "count": 335402, "self": 40.12698630784871, "children": { "process_trajectory": { "total": 796.026908398082, "count": 335402, "self": 795.2306884980644, "children": { "RLTrainer._checkpoint": { "total": 0.7962199000176042, "count": 10, "self": 0.7962199000176042 } } }, "_update_policy": { "total": 4466.686730200134, "count": 239, "self": 518.8003187987488, "children": { "TorchPOCAOptimizer.update": { "total": 3947.886411401385, "count": 7170, "self": 3947.886411401385 } } } } } } }, "trainer_threads": { "total": 6.00004568696022e-07, "count": 1, "self": 6.00004568696022e-07 }, "TrainerController._save_models": { "total": 0.0784663999802433, "count": 1, "self": 0.008741099969483912, "children": { "RLTrainer._checkpoint": { "total": 0.06972530001075938, "count": 1, "self": 0.06972530001075938 } } } } } } }