PaulTbbr's picture
First train end
52cc074 verified
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 0.5860754251480103,
"min": 0.5552442669868469,
"max": 1.5035649538040161,
"count": 7547
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 11665.2451171875,
"min": 9538.4326171875,
"max": 36082.8828125,
"count": 7547
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 51.84782608695652,
"min": 44.51376146788991,
"max": 102.08,
"count": 7547
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19080.0,
"min": 14576.0,
"max": 21716.0,
"count": 7547
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 2020.0622900649475,
"min": 1811.0887088074808,
"max": 2064.0528990523876,
"count": 7547
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 371691.46137195034,
"min": 182914.6332417105,
"max": 442594.2553746345,
"count": 7547
},
"SoccerTwos.Step.mean": {
"value": 99999997.0,
"min": 24539921.0,
"max": 99999997.0,
"count": 7547
},
"SoccerTwos.Step.sum": {
"value": 99999997.0,
"min": 24539921.0,
"max": 99999997.0,
"count": 7547
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.03841900825500488,
"min": -0.12398265302181244,
"max": 0.08097296208143234,
"count": 7547
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -7.107516288757324,
"min": -21.696964263916016,
"max": 13.198593139648438,
"count": 7547
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.038362208753824234,
"min": -0.12447786331176758,
"max": 0.08377066999673843,
"count": 7547
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -7.09700870513916,
"min": -21.783626556396484,
"max": 13.654619216918945,
"count": 7547
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 7547
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 7547
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.01118270190986427,
"min": -0.432298111840614,
"max": 0.39425536602903893,
"count": 7547
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -2.06879985332489,
"min": -68.73539978265762,
"max": 69.78319978713989,
"count": 7547
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.01118270190986427,
"min": -0.432298111840614,
"max": 0.39425536602903893,
"count": 7547
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -2.06879985332489,
"min": -68.73539978265762,
"max": 69.78319978713989,
"count": 7547
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 7547
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 7547
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.01313792565924814,
"min": 0.006748788067003867,
"max": 0.018132517344201914,
"count": 1837
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.01313792565924814,
"min": 0.006748788067003867,
"max": 0.018132517344201914,
"count": 1837
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.10392129793763161,
"min": 0.0829622300962607,
"max": 0.12137279411156972,
"count": 1837
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.10392129793763161,
"min": 0.0829622300962607,
"max": 0.12137279411156972,
"count": 1837
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.10414750551184018,
"min": 0.08385376607378324,
"max": 0.12209492151935895,
"count": 1837
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.10414750551184018,
"min": 0.08385376607378324,
"max": 0.12209492151935895,
"count": 1837
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 6.48999787000056e-09,
"min": 6.48999787000056e-09,
"max": 0.000226278738573762,
"count": 1837
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 6.48999787000056e-09,
"min": 6.48999787000056e-09,
"max": 0.000226278738573762,
"count": 1837
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.10000213000000001,
"min": 0.10000213000000001,
"max": 0.175426238,
"count": 1837
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.10000213000000001,
"min": 0.10000213000000001,
"max": 0.175426238,
"count": 1837
},
"SoccerTwos.Policy.Beta.mean": {
"value": 1.010628700000001e-05,
"min": 1.010628700000001e-05,
"max": 0.0037737692762000014,
"count": 1837
},
"SoccerTwos.Policy.Beta.sum": {
"value": 1.010628700000001e-05,
"min": 1.010628700000001e-05,
"max": 0.0037737692762000014,
"count": 1837
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1711283121",
"python_version": "3.10.1 (main, Mar 20 2024, 23:11:47) [Clang 15.0.0 (clang-1500.3.9.4)]",
"command_line_arguments": "/Users/paul/.pyenv/versions/venv_unit5_hf/bin/mlagents-learn ./config/poca/SoccerTwos00.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos00 --no-graphics --resume",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.2.1",
"numpy_version": "1.23.5",
"end_time_seconds": "1711528600"
},
"total": 245477.12733966703,
"count": 1,
"self": 0.24398920801468194,
"children": {
"run_training.setup": {
"total": 0.012441333994502202,
"count": 1,
"self": 0.012441333994502202
},
"TrainerController.start_learning": {
"total": 245476.87090912502,
"count": 1,
"self": 48.019182801566785,
"children": {
"TrainerController._reset_env": {
"total": 9.865059326170012,
"count": 203,
"self": 9.865059326170012
},
"TrainerController.advance": {
"total": 245418.89493316427,
"count": 5195544,
"self": 41.95866535537061,
"children": {
"env_step": {
"total": 200737.06311002214,
"count": 5195544,
"self": 194810.4156835102,
"children": {
"SubprocessEnvManager._take_step": {
"total": 5897.157632466056,
"count": 5195544,
"self": 193.83796419762075,
"children": {
"TorchPolicy.evaluate": {
"total": 5703.319668268436,
"count": 9453636,
"self": 5703.319668268436
}
}
},
"workers": {
"total": 29.48979404589045,
"count": 5195544,
"self": 0.0,
"children": {
"worker_root": {
"total": 245409.40406282153,
"count": 5195544,
"is_parallel": true,
"self": 56015.21461811659,
"children": {
"steps_from_proto": {
"total": 0.2706993741740007,
"count": 406,
"is_parallel": true,
"self": 0.03438532294239849,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.2363140512316022,
"count": 1624,
"is_parallel": true,
"self": 0.2363140512316022
}
}
},
"UnityEnvironment.step": {
"total": 189393.91874533077,
"count": 5195544,
"is_parallel": true,
"self": 480.95873745632707,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 3459.8037157750805,
"count": 5195544,
"is_parallel": true,
"self": 3459.8037157750805
},
"communicator.exchange": {
"total": 179190.89349023328,
"count": 5195544,
"is_parallel": true,
"self": 179190.89349023328
},
"steps_from_proto": {
"total": 6262.262801866076,
"count": 10391088,
"is_parallel": true,
"self": 769.0748052389536,
"children": {
"_process_rank_one_or_two_observation": {
"total": 5493.187996627123,
"count": 41564352,
"is_parallel": true,
"self": 5493.187996627123
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 44639.87315778676,
"count": 5195544,
"self": 364.8762370394834,
"children": {
"process_trajectory": {
"total": 7801.286456701375,
"count": 5195544,
"self": 7787.036999287549,
"children": {
"RLTrainer._checkpoint": {
"total": 14.24945741382544,
"count": 151,
"self": 14.24945741382544
}
}
},
"_update_policy": {
"total": 36473.7104640459,
"count": 1837,
"self": 4713.920209994394,
"children": {
"TorchPOCAOptimizer.update": {
"total": 31759.790254051506,
"count": 55110,
"self": 31759.790254051506
}
}
}
}
}
}
},
"trainer_threads": {
"total": 3.3300602808594704e-07,
"count": 1,
"self": 3.3300602808594704e-07
},
"TrainerController._save_models": {
"total": 0.0917335000121966,
"count": 1,
"self": 0.0011219169828109443,
"children": {
"RLTrainer._checkpoint": {
"total": 0.09061158302938566,
"count": 1,
"self": 0.09061158302938566
}
}
}
}
}
}
}