{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.6732162237167358, "min": 1.666317105293274, "max": 1.7313969135284424, "count": 12 }, "SoccerTwos.Policy.Entropy.sum": { "value": 33410.78125, "min": 23942.72265625, "max": 36793.984375, "count": 12 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 68.8, "min": 49.515625, "max": 75.49230769230769, "count": 12 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19264.0, "min": 12676.0, "max": 20368.0, "count": 12 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1480.1077948820182, "min": 1476.8847003967742, "max": 1486.7503347680472, "count": 12 }, "SoccerTwos.Self-play.ELO.sum": { "value": 207215.09128348256, "min": 189736.15258187955, "max": 266509.04672182666, "count": 12 }, "SoccerTwos.Step.mean": { "value": 12999968.0, "min": 12889988.0, "max": 12999968.0, "count": 12 }, "SoccerTwos.Step.sum": { "value": 12999968.0, "min": 12889988.0, "max": 12999968.0, "count": 12 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.014512903057038784, "min": -0.06504756957292557, "max": 0.023813463747501373, "count": 12 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 2.031806468963623, "min": -9.23675537109375, "max": 3.1671907901763916, "count": 12 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.013964519836008549, "min": -0.0684206411242485, "max": 0.025863736867904663, "count": 12 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 1.9550328254699707, "min": -9.715730667114258, "max": 3.4398770332336426, "count": 12 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 12 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 12 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.006099998099463326, "min": -0.20570000012715658, "max": 0.14858947302165784, "count": 12 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -0.8539997339248657, "min": -37.026000022888184, "max": 19.762399911880493, "count": 12 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.006099998099463326, "min": -0.20570000012715658, "max": 0.14858947302165784, "count": 12 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -0.8539997339248657, "min": -37.026000022888184, "max": 19.762399911880493, "count": 12 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 12 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 12 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01811842938477639, "min": 0.01811842938477639, "max": 0.020872831674447905, "count": 5 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01811842938477639, "min": 0.01811842938477639, "max": 0.020872831674447905, "count": 5 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10361534506082534, "min": 0.09603980109095574, "max": 0.10875900462269783, "count": 5 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10361534506082534, "min": 0.09603980109095574, "max": 0.10875900462269783, "count": 5 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10443070158362389, "min": 0.09731108844280242, "max": 0.1096098634103934, "count": 5 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.10443070158362389, "min": 0.09731108844280242, "max": 0.1096098634103934, "count": 5 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 5 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 5 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 5 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 5 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 5 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 5 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1711908962", "python_version": "3.10.11 (tags/v3.10.11:7d4cc5a, Apr 5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\E:\\Python\\huggingface-courses\\DeepRL\\.venv\\Scripts\\mlagents-learn ./ml-agents/config/poca/SoccerTwos.yaml --env=./ml-agents/training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --torch-device=cuda --no-graphics --resume", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.2.2+cu121", "numpy_version": "1.23.5", "end_time_seconds": "1711909189" }, "total": 227.18309600000066, "count": 1, "self": 0.25061380000079225, "children": { "run_training.setup": { "total": 0.06536020000021381, "count": 1, "self": 0.06536020000021381 }, "TrainerController.start_learning": { "total": 226.86712199999965, "count": 1, "self": 0.15327689998230198, "children": { "TrainerController._reset_env": { "total": 23.67103490000045, "count": 2, "self": 23.67103490000045 }, "TrainerController.advance": { "total": 202.8773993000159, "count": 8062, "self": 0.16181120012879546, "children": { "env_step": { "total": 148.72277669992036, "count": 8062, "self": 87.11682179993477, "children": { "SubprocessEnvManager._take_step": { "total": 61.510758399970655, "count": 8062, "self": 1.0319898999423458, "children": { "TorchPolicy.evaluate": { "total": 60.47876850002831, "count": 14686, "self": 60.47876850002831 } } }, "workers": { "total": 0.09519650001493574, "count": 8062, "self": 0.0, "children": { "worker_root": { "total": 203.37705950000236, "count": 8062, "is_parallel": true, "self": 133.08814110002822, "children": { "steps_from_proto": { "total": 0.0030380999996850733, "count": 4, "is_parallel": true, "self": 0.0006178999956318876, "children": { "_process_rank_one_or_two_observation": { "total": 0.0024202000040531857, "count": 16, "is_parallel": true, "self": 0.0024202000040531857 } } }, "UnityEnvironment.step": { "total": 70.28588029997445, "count": 8062, "is_parallel": true, "self": 3.5598539999791683, "children": { "UnityEnvironment._generate_step_input": { "total": 2.555393599981471, "count": 8062, "is_parallel": true, "self": 2.555393599981471 }, "communicator.exchange": { "total": 53.147821099950306, "count": 8062, "is_parallel": true, "self": 53.147821099950306 }, "steps_from_proto": { "total": 11.022811600063505, "count": 16124, "is_parallel": true, "self": 2.168546100063395, "children": { "_process_rank_one_or_two_observation": { "total": 8.85426550000011, "count": 64496, "is_parallel": true, "self": 8.85426550000011 } } } } } } } } } } }, "trainer_advance": { "total": 53.99281139996674, "count": 8062, "self": 1.064940399983243, "children": { "process_trajectory": { "total": 29.09158649998426, "count": 8062, "self": 28.879398999983096, "children": { "RLTrainer._checkpoint": { "total": 0.21218750000116415, "count": 1, "self": 0.21218750000116415 } } }, "_update_policy": { "total": 23.836284499999238, "count": 5, "self": 11.238632699996742, "children": { "TorchPOCAOptimizer.update": { "total": 12.597651800002495, "count": 150, "self": 12.597651800002495 } } } } } } }, "trainer_threads": { "total": 6.999998731771484e-07, "count": 1, "self": 6.999998731771484e-07 }, "TrainerController._save_models": { "total": 0.1654102000011335, "count": 1, "self": 0.001551100000142469, "children": { "RLTrainer._checkpoint": { "total": 0.16385910000099102, "count": 1, "self": 0.16385910000099102 } } } } } } }