adrian-nf's picture
Agent training resumed from the checkpoint.
d7f9729 verified
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 1.9888639450073242,
"min": 1.9591001272201538,
"max": 2.304722309112549,
"count": 200
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 38822.625,
"min": 35067.953125,
"max": 48122.08203125,
"count": 200
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 64.16883116883118,
"min": 39.94565217391305,
"max": 90.54545454545455,
"count": 200
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19764.0,
"min": 14700.0,
"max": 20632.0,
"count": 200
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1502.0272002853012,
"min": 1451.8758228042543,
"max": 1541.9519563270803,
"count": 200
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 231312.18884393637,
"min": 167406.66922170314,
"max": 357247.66199047794,
"count": 200
},
"SoccerTwos.Step.mean": {
"value": 4999977.0,
"min": 3009994.0,
"max": 4999977.0,
"count": 200
},
"SoccerTwos.Step.sum": {
"value": 4999977.0,
"min": 3009994.0,
"max": 4999977.0,
"count": 200
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.021213339641690254,
"min": -0.11425300687551498,
"max": 0.12809213995933533,
"count": 200
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -3.2668542861938477,
"min": -19.80826759338379,
"max": 27.155534744262695,
"count": 200
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.022786105051636696,
"min": -0.1104229986667633,
"max": 0.12847450375556946,
"count": 200
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -3.5090601444244385,
"min": -19.60022735595703,
"max": 27.236595153808594,
"count": 200
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 200
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 200
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.015171430714718707,
"min": -0.2551724847662386,
"max": 0.33634556886516037,
"count": 200
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -2.336400330066681,
"min": -48.22759962081909,
"max": 54.64479982852936,
"count": 200
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.015171430714718707,
"min": -0.2551724847662386,
"max": 0.33634556886516037,
"count": 200
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -2.336400330066681,
"min": -48.22759962081909,
"max": 54.64479982852936,
"count": 200
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 200
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 200
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.020496907197715093,
"min": 0.012350564827405227,
"max": 0.02300609917535136,
"count": 97
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.020496907197715093,
"min": 0.012350564827405227,
"max": 0.02300609917535136,
"count": 97
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.09670711110035578,
"min": 0.08462856933474541,
"max": 0.12271908149123192,
"count": 97
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.09670711110035578,
"min": 0.08462856933474541,
"max": 0.12271908149123192,
"count": 97
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.0974824791153272,
"min": 0.08525566384196281,
"max": 0.12522547418872515,
"count": 97
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.0974824791153272,
"min": 0.08525566384196281,
"max": 0.12522547418872515,
"count": 97
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 97
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 97
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 97
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 97
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 97
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 97
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1734616699",
"python_version": "3.10.12 (main, Nov 6 2024, 20:22:13) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./results/SoccerTwosNew/configuration.yaml --env=train-soccer/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwosNew --no-graphics --resume",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.5.1+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1734629293"
},
"total": 12593.300437904,
"count": 1,
"self": 0.6350089809984638,
"children": {
"run_training.setup": {
"total": 0.07823170700009996,
"count": 1,
"self": 0.07823170700009996
},
"TrainerController.start_learning": {
"total": 12592.587197216,
"count": 1,
"self": 5.7534607178804436,
"children": {
"TrainerController._reset_env": {
"total": 3.5695269119986506,
"count": 11,
"self": 3.5695269119986506
},
"TrainerController.advance": {
"total": 12583.022597385121,
"count": 139672,
"self": 6.206227469037913,
"children": {
"env_step": {
"total": 4620.27174839786,
"count": 139672,
"self": 3706.6858647516747,
"children": {
"SubprocessEnvManager._take_step": {
"total": 910.3362662619375,
"count": 139672,
"self": 34.994419181102444,
"children": {
"TorchPolicy.evaluate": {
"total": 875.341847080835,
"count": 250400,
"self": 875.341847080835
}
}
},
"workers": {
"total": 3.249617384248154,
"count": 139672,
"self": 0.0,
"children": {
"worker_root": {
"total": 12576.20785117312,
"count": 139672,
"is_parallel": true,
"self": 9492.413004122256,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.005709210000077292,
"count": 2,
"is_parallel": true,
"self": 0.001730845999873054,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0039783640002042375,
"count": 8,
"is_parallel": true,
"self": 0.0039783640002042375
}
}
},
"UnityEnvironment.step": {
"total": 0.0770735109999805,
"count": 1,
"is_parallel": true,
"self": 0.0014611759999070273,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0008674870000504598,
"count": 1,
"is_parallel": true,
"self": 0.0008674870000504598
},
"communicator.exchange": {
"total": 0.07061322499998823,
"count": 1,
"is_parallel": true,
"self": 0.07061322499998823
},
"steps_from_proto": {
"total": 0.004131623000034779,
"count": 2,
"is_parallel": true,
"self": 0.0007733510001344257,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.003358271999900353,
"count": 8,
"is_parallel": true,
"self": 0.003358271999900353
}
}
}
}
}
}
},
"steps_from_proto": {
"total": 0.028956288003655573,
"count": 20,
"is_parallel": true,
"self": 0.005744176001599044,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.02321211200205653,
"count": 80,
"is_parallel": true,
"self": 0.02321211200205653
}
}
},
"UnityEnvironment.step": {
"total": 3083.7658907628593,
"count": 139671,
"is_parallel": true,
"self": 189.4898898952615,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 120.75665496690567,
"count": 139671,
"is_parallel": true,
"self": 120.75665496690567
},
"communicator.exchange": {
"total": 2195.0722508216804,
"count": 139671,
"is_parallel": true,
"self": 2195.0722508216804
},
"steps_from_proto": {
"total": 578.4470950790119,
"count": 279342,
"is_parallel": true,
"self": 102.38880514415735,
"children": {
"_process_rank_one_or_two_observation": {
"total": 476.05828993485454,
"count": 1117368,
"is_parallel": true,
"self": 476.05828993485454
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 7956.544621518223,
"count": 139672,
"self": 40.048893840202254,
"children": {
"process_trajectory": {
"total": 1281.5025450610324,
"count": 139672,
"self": 1280.6019619390331,
"children": {
"RLTrainer._checkpoint": {
"total": 0.9005831219992615,
"count": 4,
"self": 0.9005831219992615
}
}
},
"_update_policy": {
"total": 6634.993182616989,
"count": 97,
"self": 412.37974876304725,
"children": {
"TorchPOCAOptimizer.update": {
"total": 6222.613433853941,
"count": 2910,
"self": 6222.613433853941
}
}
}
}
}
}
},
"trainer_threads": {
"total": 2.034001227002591e-06,
"count": 1,
"self": 2.034001227002591e-06
},
"TrainerController._save_models": {
"total": 0.24161016699872562,
"count": 1,
"self": 0.004555736997644999,
"children": {
"RLTrainer._checkpoint": {
"total": 0.23705443000108062,
"count": 1,
"self": 0.23705443000108062
}
}
}
}
}
}
}