{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.0388121604919434, "min": 1.945584774017334, "max": 3.2957568168640137, "count": 643 }, "SoccerTwos.Policy.Entropy.sum": { "value": 40319.55078125, "min": 9175.3837890625, "max": 110009.140625, "count": 643 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 56.50574712643678, "min": 44.0, "max": 999.0, "count": 643 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19664.0, "min": 14784.0, "max": 27752.0, "count": 643 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1576.0100270604278, "min": 1182.4108714575964, "max": 1604.9742953790003, "count": 629 }, "SoccerTwos.Self-play.ELO.sum": { "value": 274225.7447085144, "min": 2370.7237479049486, "max": 348628.63997655653, "count": 629 }, "SoccerTwos.Step.mean": { "value": 6429984.0, "min": 9244.0, "max": 6429984.0, "count": 643 }, "SoccerTwos.Step.sum": { "value": 6429984.0, "min": 9244.0, "max": 6429984.0, "count": 643 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.01336890272796154, "min": -0.1304987668991089, "max": 0.1746169626712799, "count": 643 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 2.3261890411376953, "min": -19.835813522338867, "max": 33.177223205566406, "count": 643 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.019231462851166725, "min": -0.13112477958202362, "max": 0.1667526811361313, "count": 643 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 3.3462743759155273, "min": -19.930965423583984, "max": 32.000701904296875, "count": 643 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 643 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 643 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.015324138361832192, "min": -0.5392210546292757, "max": 0.3691333289499636, "count": 643 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -2.6664000749588013, "min": -58.37399983406067, "max": 54.21820014715195, "count": 643 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.015324138361832192, "min": -0.5392210546292757, "max": 0.3691333289499636, "count": 643 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -2.6664000749588013, "min": -58.37399983406067, "max": 54.21820014715195, "count": 643 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 643 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 643 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01760386994186168, "min": 0.011585256479641733, "max": 0.026990281394682826, "count": 308 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01760386994186168, "min": 0.011585256479641733, "max": 0.026990281394682826, "count": 308 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.11058514540394147, "min": 7.689539776644476e-05, "max": 0.11738228897253672, "count": 308 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.11058514540394147, "min": 7.689539776644476e-05, "max": 0.11738228897253672, "count": 308 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.11281115661064783, "min": 7.77535743812526e-05, "max": 0.12120520348350207, "count": 308 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.11281115661064783, "min": 7.77535743812526e-05, "max": 0.12120520348350207, "count": 308 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 308 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 308 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.19999999999999996, "max": 0.20000000000000007, "count": 308 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.19999999999999996, "max": 0.20000000000000007, "count": 308 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 308 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005, "max": 0.005000000000000001, "count": 308 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1737565781", "python_version": "3.10.12 (main, Jul 5 2023, 15:02:25) [Clang 14.0.6 ]", "command_line_arguments": "/opt/anaconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.5.1", "numpy_version": "1.23.5", "end_time_seconds": "1737586666" }, "total": 20883.988616667004, "count": 1, "self": 0.2032090000138851, "children": { "run_training.setup": { "total": 0.016666458002873696, "count": 1, "self": 0.016666458002873696 }, "TrainerController.start_learning": { "total": 20883.768741208987, "count": 1, "self": 3.51508834354172, "children": { "TrainerController._reset_env": { "total": 4.790670747999684, "count": 33, "self": 4.790670747999684 }, "TrainerController.advance": { "total": 20875.367187783442, "count": 436500, "self": 3.1530161247355863, "children": { "env_step": { "total": 16717.00499983302, "count": 436500, "self": 16122.510443662133, "children": { "SubprocessEnvManager._take_step": { "total": 591.967685976997, "count": 436500, "self": 17.01103952426638, "children": { "TorchPolicy.evaluate": { "total": 574.9566464527306, "count": 812812, "self": 574.9566464527306 } } }, "workers": { "total": 2.5268701938912272, "count": 436499, "self": 0.0, "children": { "worker_root": { "total": 20873.723298654702, "count": 436499, "is_parallel": true, "self": 5266.90333614593, "children": { "steps_from_proto": { "total": 0.04262995801400393, "count": 66, "is_parallel": true, "self": 0.005426217016065493, "children": { "_process_rank_one_or_two_observation": { "total": 0.03720374099793844, "count": 264, "is_parallel": true, "self": 0.03720374099793844 } } }, "UnityEnvironment.step": { "total": 15606.777332550759, "count": 436499, "is_parallel": true, "self": 43.03211950631521, "children": { "UnityEnvironment._generate_step_input": { "total": 273.32911062698986, "count": 436499, "is_parallel": true, "self": 273.32911062698986 }, "communicator.exchange": { "total": 14753.169074985432, "count": 436499, "is_parallel": true, "self": 14753.169074985432 }, "steps_from_proto": { "total": 537.2470274320222, "count": 872998, "is_parallel": true, "self": 62.450076708730194, "children": { "_process_rank_one_or_two_observation": { "total": 474.79695072329196, "count": 3491992, "is_parallel": true, "self": 474.79695072329196 } } } } } } } } } } }, "trainer_advance": { "total": 4155.209171825685, "count": 436499, "self": 37.53154071350582, "children": { "process_trajectory": { "total": 814.5478735752113, "count": 436499, "self": 812.60779490821, "children": { "RLTrainer._checkpoint": { "total": 1.9400786670012167, "count": 12, "self": 1.9400786670012167 } } }, "_update_policy": { "total": 3303.129757536968, "count": 309, "self": 375.33577479214, "children": { "TorchPOCAOptimizer.update": { "total": 2927.793982744828, "count": 9282, "self": 2927.793982744828 } } } } } } }, "trainer_threads": { "total": 8.33999365568161e-07, "count": 1, "self": 8.33999365568161e-07 }, "TrainerController._save_models": { "total": 0.09579350000421982, "count": 1, "self": 0.0005308749969117343, "children": { "RLTrainer._checkpoint": { "total": 0.09526262500730809, "count": 1, "self": 0.09526262500730809 } } } } } } }