Agent training resumed from the checkpoint.

d7f9729 verified about 2 months ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.9888639450073242,
	"min": 1.9591001272201538,
	"max": 2.304722309112549,
	"count": 200
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 38822.625,
	"min": 35067.953125,
	"max": 48122.08203125,
	"count": 200
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 64.16883116883118,
	"min": 39.94565217391305,
	"max": 90.54545454545455,
	"count": 200
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19764.0,
	"min": 14700.0,
	"max": 20632.0,
	"count": 200
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1502.0272002853012,
	"min": 1451.8758228042543,
	"max": 1541.9519563270803,
	"count": 200
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 231312.18884393637,
	"min": 167406.66922170314,
	"max": 357247.66199047794,
	"count": 200
	},
	"SoccerTwos.Step.mean": {
	"value": 4999977.0,
	"min": 3009994.0,
	"max": 4999977.0,
	"count": 200
	},
	"SoccerTwos.Step.sum": {
	"value": 4999977.0,
	"min": 3009994.0,
	"max": 4999977.0,
	"count": 200
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.021213339641690254,
	"min": -0.11425300687551498,
	"max": 0.12809213995933533,
	"count": 200
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -3.2668542861938477,
	"min": -19.80826759338379,
	"max": 27.155534744262695,
	"count": 200
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.022786105051636696,
	"min": -0.1104229986667633,
	"max": 0.12847450375556946,
	"count": 200
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -3.5090601444244385,
	"min": -19.60022735595703,
	"max": 27.236595153808594,
	"count": 200
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 200
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 200
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.015171430714718707,
	"min": -0.2551724847662386,
	"max": 0.33634556886516037,
	"count": 200
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -2.336400330066681,
	"min": -48.22759962081909,
	"max": 54.64479982852936,
	"count": 200
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.015171430714718707,
	"min": -0.2551724847662386,
	"max": 0.33634556886516037,
	"count": 200
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -2.336400330066681,
	"min": -48.22759962081909,
	"max": 54.64479982852936,
	"count": 200
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 200
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 200
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.020496907197715093,
	"min": 0.012350564827405227,
	"max": 0.02300609917535136,
	"count": 97
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.020496907197715093,
	"min": 0.012350564827405227,
	"max": 0.02300609917535136,
	"count": 97
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.09670711110035578,
	"min": 0.08462856933474541,
	"max": 0.12271908149123192,
	"count": 97
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.09670711110035578,
	"min": 0.08462856933474541,
	"max": 0.12271908149123192,
	"count": 97
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.0974824791153272,
	"min": 0.08525566384196281,
	"max": 0.12522547418872515,
	"count": 97
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.0974824791153272,
	"min": 0.08525566384196281,
	"max": 0.12522547418872515,
	"count": 97
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 97
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 97
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 97
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 97
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 97
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 97
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1734616699",
	"python_version": "3.10.12 (main, Nov 6 2024, 20:22:13) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./results/SoccerTwosNew/configuration.yaml --env=train-soccer/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwosNew --no-graphics --resume",
	"mlagents_version": "1.2.0.dev0",
	"mlagents_envs_version": "1.2.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.5.1+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1734629293"
	},
	"total": 12593.300437904,
	"count": 1,
	"self": 0.6350089809984638,
	"children": {
	"run_training.setup": {
	"total": 0.07823170700009996,
	"count": 1,
	"self": 0.07823170700009996
	},
	"TrainerController.start_learning": {
	"total": 12592.587197216,
	"count": 1,
	"self": 5.7534607178804436,
	"children": {
	"TrainerController._reset_env": {
	"total": 3.5695269119986506,
	"count": 11,
	"self": 3.5695269119986506
	},
	"TrainerController.advance": {
	"total": 12583.022597385121,
	"count": 139672,
	"self": 6.206227469037913,
	"children": {
	"env_step": {
	"total": 4620.27174839786,
	"count": 139672,
	"self": 3706.6858647516747,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 910.3362662619375,
	"count": 139672,
	"self": 34.994419181102444,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 875.341847080835,
	"count": 250400,
	"self": 875.341847080835
	}
	}
	},
	"workers": {
	"total": 3.249617384248154,
	"count": 139672,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 12576.20785117312,
	"count": 139672,
	"is_parallel": true,
	"self": 9492.413004122256,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.005709210000077292,
	"count": 2,
	"is_parallel": true,
	"self": 0.001730845999873054,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0039783640002042375,
	"count": 8,
	"is_parallel": true,
	"self": 0.0039783640002042375
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.0770735109999805,
	"count": 1,
	"is_parallel": true,
	"self": 0.0014611759999070273,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0008674870000504598,
	"count": 1,
	"is_parallel": true,
	"self": 0.0008674870000504598
	},
	"communicator.exchange": {
	"total": 0.07061322499998823,
	"count": 1,
	"is_parallel": true,
	"self": 0.07061322499998823
	},
	"steps_from_proto": {
	"total": 0.004131623000034779,
	"count": 2,
	"is_parallel": true,
	"self": 0.0007733510001344257,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.003358271999900353,
	"count": 8,
	"is_parallel": true,
	"self": 0.003358271999900353
	}
	}
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.028956288003655573,
	"count": 20,
	"is_parallel": true,
	"self": 0.005744176001599044,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.02321211200205653,
	"count": 80,
	"is_parallel": true,
	"self": 0.02321211200205653
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 3083.7658907628593,
	"count": 139671,
	"is_parallel": true,
	"self": 189.4898898952615,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 120.75665496690567,
	"count": 139671,
	"is_parallel": true,
	"self": 120.75665496690567
	},
	"communicator.exchange": {
	"total": 2195.0722508216804,
	"count": 139671,
	"is_parallel": true,
	"self": 2195.0722508216804
	},
	"steps_from_proto": {
	"total": 578.4470950790119,
	"count": 279342,
	"is_parallel": true,
	"self": 102.38880514415735,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 476.05828993485454,
	"count": 1117368,
	"is_parallel": true,
	"self": 476.05828993485454
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 7956.544621518223,
	"count": 139672,
	"self": 40.048893840202254,
	"children": {
	"process_trajectory": {
	"total": 1281.5025450610324,
	"count": 139672,
	"self": 1280.6019619390331,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.9005831219992615,
	"count": 4,
	"self": 0.9005831219992615
	}
	}
	},
	"_update_policy": {
	"total": 6634.993182616989,
	"count": 97,
	"self": 412.37974876304725,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 6222.613433853941,
	"count": 2910,
	"self": 6222.613433853941
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 2.034001227002591e-06,
	"count": 1,
	"self": 2.034001227002591e-06
	},
	"TrainerController._save_models": {
	"total": 0.24161016699872562,
	"count": 1,
	"self": 0.004555736997644999,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.23705443000108062,
	"count": 1,
	"self": 0.23705443000108062
	}
	}
	}
	}
	}
	}
	}