Updated model (20 million timesteps)

b20082f verified 11 months ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.5748671293258667,
	"min": 1.4515712261199951,
	"max": 3.295767068862915,
	"count": 2018
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 29481.51171875,
	"min": 16642.564453125,
	"max": 128571.7890625,
	"count": 2018
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 68.33333333333333,
	"min": 42.75438596491228,
	"max": 999.0,
	"count": 2018
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19680.0,
	"min": 15472.0,
	"max": 25720.0,
	"count": 2018
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1813.964002222687,
	"min": 1194.0795318366788,
	"max": 1829.6692313391911,
	"count": 2004
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 261210.81632006692,
	"min": 2389.695889279391,
	"max": 395167.0134550716,
	"count": 2004
	},
	"SoccerTwos.Step.mean": {
	"value": 20179974.0,
	"min": 9880.0,
	"max": 20179974.0,
	"count": 2018
	},
	"SoccerTwos.Step.sum": {
	"value": 20179974.0,
	"min": 9880.0,
	"max": 20179974.0,
	"count": 2018
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": 0.03130420669913292,
	"min": -0.13359923660755157,
	"max": 0.2269599735736847,
	"count": 2018
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": 4.507805824279785,
	"min": -22.845468521118164,
	"max": 29.400775909423828,
	"count": 2018
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.03485170006752014,
	"min": -0.13703596591949463,
	"max": 0.22179201245307922,
	"count": 2018
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": 5.0186448097229,
	"min": -23.433151245117188,
	"max": 28.790231704711914,
	"count": 2018
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 2018
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 2018
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.19314860925078392,
	"min": -0.7058823529411765,
	"max": 0.5010000041552952,
	"count": 2018
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 27.813399732112885,
	"min": -58.92579996585846,
	"max": 57.05700010061264,
	"count": 2018
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.19314860925078392,
	"min": -0.7058823529411765,
	"max": 0.5010000041552952,
	"count": 2018
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 27.813399732112885,
	"min": -58.92579996585846,
	"max": 57.05700010061264,
	"count": 2018
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 2018
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 2018
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.014401823965211709,
	"min": 0.010948918039017978,
	"max": 0.026382342888973655,
	"count": 977
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.014401823965211709,
	"min": 0.010948918039017978,
	"max": 0.026382342888973655,
	"count": 977
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.10085869679848353,
	"min": 1.3623070056686023e-05,
	"max": 0.12364189525445303,
	"count": 977
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.10085869679848353,
	"min": 1.3623070056686023e-05,
	"max": 0.12364189525445303,
	"count": 977
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.10252247750759125,
	"min": 1.3706155762823376e-05,
	"max": 0.12558744698762894,
	"count": 977
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.10252247750759125,
	"min": 1.3706155762823376e-05,
	"max": 0.12558744698762894,
	"count": 977
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 977
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 977
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 977
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 977
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 977
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 977
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1709895743",
	"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
	"command_line_arguments": "/home/rodrigopc/.local/bin/mlagents-learn SoccerTwos_modified.yaml --env=/home/rodrigopc/ml-agents/training-envs-executables/linux/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos_updated --no-graphics",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.2.1+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1709936999"
	},
	"total": 41256.521167513,
	"count": 1,
	"self": 0.18425126899091993,
	"children": {
	"run_training.setup": {
	"total": 0.05187036900042585,
	"count": 1,
	"self": 0.05187036900042585
	},
	"TrainerController.start_learning": {
	"total": 41256.285045875,
	"count": 1,
	"self": 27.919663638676866,
	"children": {
	"TrainerController._reset_env": {
	"total": 6.964602779999041,
	"count": 101,
	"self": 6.964602779999041
	},
	"TrainerController.advance": {
	"total": 41221.20336056933,
	"count": 1390338,
	"self": 30.872472584182105,
	"children": {
	"env_step": {
	"total": 32896.68286713089,
	"count": 1390338,
	"self": 27039.377430651955,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 5838.767032415787,
	"count": 1390338,
	"self": 219.904029299817,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 5618.86300311597,
	"count": 2539262,
	"self": 5618.86300311597
	}
	}
	},
	"workers": {
	"total": 18.538404063148846,
	"count": 1390337,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 41204.655873804215,
	"count": 1390337,
	"is_parallel": true,
	"self": 17584.037723834623,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0031043719991430407,
	"count": 2,
	"is_parallel": true,
	"self": 0.0005973539973638253,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0025070180017792154,
	"count": 8,
	"is_parallel": true,
	"self": 0.0025070180017792154
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.038335561999701895,
	"count": 1,
	"is_parallel": true,
	"self": 0.0013620440004160628,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0008860750003805151,
	"count": 1,
	"is_parallel": true,
	"self": 0.0008860750003805151
	},
	"communicator.exchange": {
	"total": 0.032491597999978694,
	"count": 1,
	"is_parallel": true,
	"self": 0.032491597999978694
	},
	"steps_from_proto": {
	"total": 0.003595844998926623,
	"count": 2,
	"is_parallel": true,
	"self": 0.0006351379970510607,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.002960707001875562,
	"count": 8,
	"is_parallel": true,
	"self": 0.002960707001875562
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 23620.29635493657,
	"count": 1390336,
	"is_parallel": true,
	"self": 1370.2611271664355,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 901.2657550687136,
	"count": 1390336,
	"is_parallel": true,
	"self": 901.2657550687136
	},
	"communicator.exchange": {
	"total": 17661.649300808305,
	"count": 1390336,
	"is_parallel": true,
	"self": 17661.649300808305
	},
	"steps_from_proto": {
	"total": 3687.1201718931143,
	"count": 2780672,
	"is_parallel": true,
	"self": 608.9816866305036,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 3078.1384852626106,
	"count": 11122688,
	"is_parallel": true,
	"self": 3078.1384852626106
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.3217950330226813,
	"count": 200,
	"is_parallel": true,
	"self": 0.052848733058453945,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.26894629996422736,
	"count": 800,
	"is_parallel": true,
	"self": 0.26894629996422736
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 8293.648020854256,
	"count": 1390337,
	"self": 218.09872197608638,
	"children": {
	"process_trajectory": {
	"total": 3157.0397781192096,
	"count": 1390337,
	"self": 3148.9236194042096,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 8.116158714999983,
	"count": 40,
	"self": 8.116158714999983
	}
	}
	},
	"_update_policy": {
	"total": 4918.50952075896,
	"count": 977,
	"self": 2116.926655908939,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 2801.582864850021,
	"count": 29310,
	"self": 2801.582864850021
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.8860009731724858e-06,
	"count": 1,
	"self": 1.8860009731724858e-06
	},
	"TrainerController._save_models": {
	"total": 0.19741700099984882,
	"count": 1,
	"self": 0.0027244980010436848,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.19469250299880514,
	"count": 1,
	"self": 0.19469250299880514
	}
	}
	}
	}
	}
	}
	}