Trained 7870000

cb0418c about 2 years ago

20.1 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.7180193662643433,
	"min": 1.7169474363327026,
	"max": 3.2957441806793213,
	"count": 787
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 34690.24609375,
	"min": 16131.07421875,
	"max": 140922.515625,
	"count": 787
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 66.06666666666666,
	"min": 41.61538461538461,
	"max": 999.0,
	"count": 787
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19820.0,
	"min": 13584.0,
	"max": 28736.0,
	"count": 787
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1625.8410519662657,
	"min": 1185.8544299297316,
	"max": 1631.4416858275144,
	"count": 750
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 243876.15779493985,
	"min": 2373.431629846521,
	"max": 358590.00508752686,
	"count": 750
	},
	"SoccerTwos.Step.mean": {
	"value": 7869994.0,
	"min": 9384.0,
	"max": 7869994.0,
	"count": 787
	},
	"SoccerTwos.Step.sum": {
	"value": 7869994.0,
	"min": 9384.0,
	"max": 7869994.0,
	"count": 787
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": 0.009102094918489456,
	"min": -0.11238310486078262,
	"max": 0.17347067594528198,
	"count": 787
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": 1.365314245223999,
	"min": -20.116575241088867,
	"max": 21.37743377685547,
	"count": 787
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.0034246540162712336,
	"min": -0.10482959449291229,
	"max": 0.17781499028205872,
	"count": 787
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": 0.5136981010437012,
	"min": -19.634157180786133,
	"max": 21.74544334411621,
	"count": 787
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 787
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 787
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.011397332350413004,
	"min": -0.7266000015395028,
	"max": 0.4861166576544444,
	"count": 787
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -1.7095998525619507,
	"min": -49.8672000169754,
	"max": 56.26180016994476,
	"count": 787
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.011397332350413004,
	"min": -0.7266000015395028,
	"max": 0.4861166576544444,
	"count": 787
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -1.7095998525619507,
	"min": -49.8672000169754,
	"max": 56.26180016994476,
	"count": 787
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 787
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 787
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.01732818890401783,
	"min": 0.009920780318013082,
	"max": 0.0233591943862848,
	"count": 377
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.01732818890401783,
	"min": 0.009920780318013082,
	"max": 0.0233591943862848,
	"count": 377
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.09881145680944124,
	"min": 1.6208294994157769e-06,
	"max": 0.11842111423611641,
	"count": 377
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.09881145680944124,
	"min": 1.6208294994157769e-06,
	"max": 0.11842111423611641,
	"count": 377
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.10014958679676056,
	"min": 1.6126407217598172e-06,
	"max": 0.12048137858510018,
	"count": 377
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.10014958679676056,
	"min": 1.6126407217598172e-06,
	"max": 0.12048137858510018,
	"count": 377
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 377
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 377
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.2,
	"max": 0.20000000000000007,
	"count": 377
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.2,
	"max": 0.20000000000000007,
	"count": 377
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005,
	"max": 0.005000000000000001,
	"count": 377
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005,
	"max": 0.005000000000000001,
	"count": 377
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1679698174",
	"python_version": "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]",
	"command_line_arguments": "/home/yibo/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos --run-id=SoccerTwos --no-graphics --force",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.10.2+cu111",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1679729109"
	},
	"total": 30935.237442568,
	"count": 1,
	"self": 0.03809124100371264,
	"children": {
	"run_training.setup": {
	"total": 0.010129379999852972,
	"count": 1,
	"self": 0.010129379999852972
	},
	"TrainerController.start_learning": {
	"total": 30935.189221946996,
	"count": 1,
	"self": 8.493996333803807,
	"children": {
	"TrainerController._reset_env": {
	"total": 5.343945046992303,
	"count": 40,
	"self": 5.343945046992303
	},
	"TrainerController.advance": {
	"total": 30921.22087009319,
	"count": 535380,
	"self": 7.671441275608231,
	"children": {
	"env_step": {
	"total": 28878.602780573216,
	"count": 535380,
	"self": 27466.31712422939,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 1407.5150980348299,
	"count": 535380,
	"self": 45.01167038467065,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 1362.5034276501592,
	"count": 995452,
	"self": 1362.5034276501592
	}
	}
	},
	"workers": {
	"total": 4.770558308993714,
	"count": 535379,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 30923.658859852934,
	"count": 535379,
	"is_parallel": true,
	"self": 4330.540863548518,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.001981433999389992,
	"count": 2,
	"is_parallel": true,
	"self": 0.00038393999784602784,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0015974940015439643,
	"count": 8,
	"is_parallel": true,
	"self": 0.0015974940015439643
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.05770361100076116,
	"count": 1,
	"is_parallel": true,
	"self": 0.00013167900215194095,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0009889229986583814,
	"count": 1,
	"is_parallel": true,
	"self": 0.0009889229986583814
	},
	"communicator.exchange": {
	"total": 0.054780389998995815,
	"count": 1,
	"is_parallel": true,
	"self": 0.054780389998995815
	},
	"steps_from_proto": {
	"total": 0.0018026190009550191,
	"count": 2,
	"is_parallel": true,
	"self": 0.0002745570018305443,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0015280619991244748,
	"count": 8,
	"is_parallel": true,
	"self": 0.0015280619991244748
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 26593.053648012406,
	"count": 535378,
	"is_parallel": true,
	"self": 68.33193406206192,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 508.81990166595824,
	"count": 535378,
	"is_parallel": true,
	"self": 508.81990166595824
	},
	"communicator.exchange": {
	"total": 25099.028850086477,
	"count": 535378,
	"is_parallel": true,
	"self": 25099.028850086477
	},
	"steps_from_proto": {
	"total": 916.872962197911,
	"count": 1070756,
	"is_parallel": true,
	"self": 125.42700791917923,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 791.4459542787317,
	"count": 4283024,
	"is_parallel": true,
	"self": 791.4459542787317
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.06434829200952663,
	"count": 78,
	"is_parallel": true,
	"self": 0.009044202002769453,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.05530409000675718,
	"count": 312,
	"is_parallel": true,
	"self": 0.05530409000675718
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 2034.9466482443659,
	"count": 535379,
	"self": 54.565092668593934,
	"children": {
	"process_trajectory": {
	"total": 698.4577447428019,
	"count": 535379,
	"self": 696.5146503238138,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 1.9430944189880393,
	"count": 15,
	"self": 1.9430944189880393
	}
	}
	},
	"_update_policy": {
	"total": 1281.92381083297,
	"count": 378,
	"self": 799.7250728169147,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 482.19873801605536,
	"count": 11343,
	"self": 482.19873801605536
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 7.730050128884614e-07,
	"count": 1,
	"self": 7.730050128884614e-07
	},
	"TrainerController._save_models": {
	"total": 0.13040970000292873,
	"count": 1,
	"self": 0.0015894020034465939,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.12882029799948214,
	"count": 1,
	"self": 0.12882029799948214
	}
	}
	}
	}
	}
	}
	}