Alex48's picture
12M version with time_horizon=64
f42d58c
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 1.4872379302978516,
"min": 1.3691848516464233,
"max": 1.5180870294570923,
"count": 500
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 29126.068359375,
"min": 26924.76953125,
"max": 31682.85546875,
"count": 500
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 55.17204301075269,
"min": 43.629629629629626,
"max": 89.61290322580645,
"count": 500
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 20524.0,
"min": 16228.0,
"max": 22224.0,
"count": 500
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1616.4376838638466,
"min": 1579.196744553692,
"max": 1700.385513235949,
"count": 500
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 300657.4091986755,
"min": 172213.5726064411,
"max": 364738.29520057444,
"count": 500
},
"SoccerTwos.Step.mean": {
"value": 19999978.0,
"min": 15009988.0,
"max": 19999978.0,
"count": 500
},
"SoccerTwos.Step.sum": {
"value": 19999978.0,
"min": 15009988.0,
"max": 19999978.0,
"count": 500
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.019980132579803467,
"min": -0.1321440488100052,
"max": 0.061585795134305954,
"count": 500
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -5.054973602294922,
"min": -30.52527618408203,
"max": 15.7752046585083,
"count": 500
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.02246411144733429,
"min": -0.13097290694713593,
"max": 0.061111632734537125,
"count": 500
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -5.683420181274414,
"min": -30.254741668701172,
"max": 15.583466529846191,
"count": 500
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 500
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 500
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.05270054082612734,
"min": -0.40565573778308806,
"max": 0.30312405130531217,
"count": 500
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 9.749600052833557,
"min": -54.468600153923035,
"max": 47.89360010623932,
"count": 500
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.05270054082612734,
"min": -0.40565573778308806,
"max": 0.30312405130531217,
"count": 500
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 9.749600052833557,
"min": -54.468600153923035,
"max": 47.89360010623932,
"count": 500
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 500
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 500
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.021534866566071287,
"min": 0.020611216948501827,
"max": 0.02840943965714097,
"count": 82
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.021534866566071287,
"min": 0.020611216948501827,
"max": 0.02840943965714097,
"count": 82
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.09841666143324415,
"min": 0.08451716904923068,
"max": 0.10750022770489677,
"count": 82
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.09841666143324415,
"min": 0.08451716904923068,
"max": 0.10750022770489677,
"count": 82
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.09949219896126602,
"min": 0.08530456951614153,
"max": 0.10901509269566859,
"count": 82
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.09949219896126602,
"min": 0.08530456951614153,
"max": 0.10901509269566859,
"count": 82
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 9.999999999999999e-05,
"min": 9.999999999999999e-05,
"max": 9.999999999999999e-05,
"count": 82
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 9.999999999999999e-05,
"min": 9.999999999999999e-05,
"max": 9.999999999999999e-05,
"count": 82
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.19999999999999998,
"min": 0.19999999999999998,
"max": 0.19999999999999998,
"count": 82
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.19999999999999998,
"min": 0.19999999999999998,
"max": 0.19999999999999998,
"count": 82
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005,
"min": 0.005,
"max": 0.005,
"count": 82
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005,
"min": 0.005,
"max": 0.005,
"count": 82
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1678088896",
"python_version": "3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]",
"command_line_arguments": "C:\\Users\\Alex\\.conda\\envs\\rl\\Scripts\\mlagents-learn config\\poca\\SoccerTwos-v4.yaml --env=training-envs-executables\\SoccerTwos\\SoccerTwos.exe --run-id SoccerTwos --no-graphics --resume",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.13.1+cpu",
"numpy_version": "1.21.2",
"end_time_seconds": "1678103026"
},
"total": 14129.332310592,
"count": 1,
"self": 1.1111714860016946,
"children": {
"run_training.setup": {
"total": 0.19717049600000003,
"count": 1,
"self": 0.19717049600000003
},
"TrainerController.start_learning": {
"total": 14128.02396861,
"count": 1,
"self": 9.5890246804247,
"children": {
"TrainerController._reset_env": {
"total": 4.5362276140002935,
"count": 26,
"self": 4.5362276140002935
},
"TrainerController.advance": {
"total": 14113.775025608575,
"count": 344478,
"self": 8.740378945574776,
"children": {
"env_step": {
"total": 6499.026367676286,
"count": 344478,
"self": 5181.30116057813,
"children": {
"SubprocessEnvManager._take_step": {
"total": 1312.0876138652366,
"count": 344478,
"self": 45.67449664603737,
"children": {
"TorchPolicy.evaluate": {
"total": 1266.4131172191992,
"count": 626106,
"self": 1266.4131172191992
}
}
},
"workers": {
"total": 5.637593232919463,
"count": 344478,
"self": 0.0,
"children": {
"worker_root": {
"total": 14111.950555176158,
"count": 344478,
"is_parallel": true,
"self": 9999.969528788399,
"children": {
"steps_from_proto": {
"total": 0.05718324399976371,
"count": 52,
"is_parallel": true,
"self": 0.010959142993296034,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.04622410100646768,
"count": 208,
"is_parallel": true,
"self": 0.04622410100646768
}
}
},
"UnityEnvironment.step": {
"total": 4111.923843143759,
"count": 344478,
"is_parallel": true,
"self": 212.07101066499308,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 196.16795433004822,
"count": 344478,
"is_parallel": true,
"self": 196.16795433004822
},
"communicator.exchange": {
"total": 2925.141056170298,
"count": 344478,
"is_parallel": true,
"self": 2925.141056170298
},
"steps_from_proto": {
"total": 778.5438219784198,
"count": 688956,
"is_parallel": true,
"self": 151.49830487435952,
"children": {
"_process_rank_one_or_two_observation": {
"total": 627.0455171040603,
"count": 2755824,
"is_parallel": true,
"self": 627.0455171040603
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 7606.008278986715,
"count": 344478,
"self": 53.78237858206376,
"children": {
"process_trajectory": {
"total": 1932.6900048486557,
"count": 344478,
"self": 1931.339307254654,
"children": {
"RLTrainer._checkpoint": {
"total": 1.3506975940017583,
"count": 10,
"self": 1.3506975940017583
}
}
},
"_update_policy": {
"total": 5619.535895555995,
"count": 82,
"self": 646.1680682530086,
"children": {
"TorchPOCAOptimizer.update": {
"total": 4973.367827302986,
"count": 9676,
"self": 4973.367827302986
}
}
}
}
}
}
},
"trainer_threads": {
"total": 9.049999789567664e-07,
"count": 1,
"self": 9.049999789567664e-07
},
"TrainerController._save_models": {
"total": 0.12368980199971702,
"count": 1,
"self": 0.0024554240008001216,
"children": {
"RLTrainer._checkpoint": {
"total": 0.1212343779989169,
"count": 1,
"self": 0.1212343779989169
}
}
}
}
}
}
}