Huggy first training with default config PPO

c9d0c0f about 2 years ago

17.9 kB

	{
	"name": "root",
	"gauges": {
	"Huggy.Policy.Entropy.mean": {
	"value": 1.4088364839553833,
	"min": 1.4088364839553833,
	"max": 1.4288266897201538,
	"count": 40
	},
	"Huggy.Policy.Entropy.sum": {
	"value": 69125.96875,
	"min": 67958.75,
	"max": 76833.765625,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.mean": {
	"value": 104.04201680672269,
	"min": 90.85531135531136,
	"max": 386.54615384615386,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.sum": {
	"value": 49524.0,
	"min": 48960.0,
	"max": 50251.0,
	"count": 40
	},
	"Huggy.Step.mean": {
	"value": 1999991.0,
	"min": 49992.0,
	"max": 1999991.0,
	"count": 40
	},
	"Huggy.Step.sum": {
	"value": 1999991.0,
	"min": 49992.0,
	"max": 1999991.0,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.mean": {
	"value": 2.3234262466430664,
	"min": 0.0995357409119606,
	"max": 2.3824241161346436,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.sum": {
	"value": 1105.950927734375,
	"min": 12.840110778808594,
	"max": 1280.3931884765625,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.mean": {
	"value": 3.5949057628627585,
	"min": 1.7076376492893972,
	"max": 3.855494010402869,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.sum": {
	"value": 1711.175143122673,
	"min": 220.28525675833225,
	"max": 2030.16558521986,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.mean": {
	"value": 3.5949057628627585,
	"min": 1.7076376492893972,
	"max": 3.855494010402869,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.sum": {
	"value": 1711.175143122673,
	"min": 220.28525675833225,
	"max": 2030.16558521986,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.mean": {
	"value": 0.015666097746482893,
	"min": 0.013582871308770136,
	"max": 0.020586256164339525,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.sum": {
	"value": 0.031332195492965786,
	"min": 0.028695884411475466,
	"max": 0.0583777700647867,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.mean": {
	"value": 0.04581902561088403,
	"min": 0.022617052764528328,
	"max": 0.05437094138728247,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.sum": {
	"value": 0.09163805122176806,
	"min": 0.04535687882453203,
	"max": 0.16311282416184741,
	"count": 40
	},
	"Huggy.Policy.LearningRate.mean": {
	"value": 4.580873473075005e-06,
	"min": 4.580873473075005e-06,
	"max": 0.00029536792654402504,
	"count": 40
	},
	"Huggy.Policy.LearningRate.sum": {
	"value": 9.16174694615001e-06,
	"min": 9.16174694615001e-06,
	"max": 0.0008443270685576499,
	"count": 40
	},
	"Huggy.Policy.Epsilon.mean": {
	"value": 0.10152692500000005,
	"min": 0.10152692500000005,
	"max": 0.198455975,
	"count": 40
	},
	"Huggy.Policy.Epsilon.sum": {
	"value": 0.2030538500000001,
	"min": 0.2030538500000001,
	"max": 0.5814423500000001,
	"count": 40
	},
	"Huggy.Policy.Beta.mean": {
	"value": 8.619355750000007e-05,
	"min": 8.619355750000007e-05,
	"max": 0.004922953152500001,
	"count": 40
	},
	"Huggy.Policy.Beta.sum": {
	"value": 0.00017238711500000014,
	"min": 0.00017238711500000014,
	"max": 0.014073973265,
	"count": 40
	},
	"Huggy.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	},
	"Huggy.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1670438494",
	"python_version": "3.8.15 (default, Oct 12 2022, 19:14:39) \n[GCC 7.5.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/Huggy.yaml --env=./trained-envs-executables/linux/Huggy/Huggy --run-id=Huggy --no-graphics",
	"mlagents_version": "0.29.0.dev0",
	"mlagents_envs_version": "0.29.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.8.1+cu102",
	"numpy_version": "1.21.6",
	"end_time_seconds": "1670440891"
	},
	"total": 2396.790107292,
	"count": 1,
	"self": 0.4002785859997857,
	"children": {
	"run_training.setup": {
	"total": 0.11809149900000193,
	"count": 1,
	"self": 0.11809149900000193
	},
	"TrainerController.start_learning": {
	"total": 2396.271737207,
	"count": 1,
	"self": 4.204856126065351,
	"children": {
	"TrainerController._reset_env": {
	"total": 10.990058719999979,
	"count": 1,
	"self": 10.990058719999979
	},
	"TrainerController.advance": {
	"total": 2380.950918312935,
	"count": 231714,
	"self": 4.4190581728953475,
	"children": {
	"env_step": {
	"total": 1880.0255490469494,
	"count": 231714,
	"self": 1584.0090541549546,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 293.1739681670408,
	"count": 231714,
	"self": 15.361636591057504,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 277.8123315759833,
	"count": 222892,
	"self": 69.04148780897322,
	"children": {
	"TorchPolicy.sample_actions": {
	"total": 208.7708437670101,
	"count": 222892,
	"self": 208.7708437670101
	}
	}
	}
	}
	},
	"workers": {
	"total": 2.842526724954041,
	"count": 231714,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2387.77907865304,
	"count": 231714,
	"is_parallel": true,
	"self": 1085.4469075751028,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.007460876999857646,
	"count": 1,
	"is_parallel": true,
	"self": 0.00036429299962037476,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0070965840002372715,
	"count": 2,
	"is_parallel": true,
	"self": 0.0070965840002372715
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.02844806600000993,
	"count": 1,
	"is_parallel": true,
	"self": 0.00028992000011385244,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0002044679999926302,
	"count": 1,
	"is_parallel": true,
	"self": 0.0002044679999926302
	},
	"communicator.exchange": {
	"total": 0.02716889899988928,
	"count": 1,
	"is_parallel": true,
	"self": 0.02716889899988928
	},
	"steps_from_proto": {
	"total": 0.0007847790000141686,
	"count": 1,
	"is_parallel": true,
	"self": 0.0002602380000098492,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0005245410000043194,
	"count": 2,
	"is_parallel": true,
	"self": 0.0005245410000043194
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1302.3321710779371,
	"count": 231713,
	"is_parallel": true,
	"self": 36.381728045037335,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 79.04686686896844,
	"count": 231713,
	"is_parallel": true,
	"self": 79.04686686896844
	},
	"communicator.exchange": {
	"total": 1089.050730830006,
	"count": 231713,
	"is_parallel": true,
	"self": 1089.050730830006
	},
	"steps_from_proto": {
	"total": 97.85284533392542,
	"count": 231713,
	"is_parallel": true,
	"self": 39.46468159792471,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 58.388163736000706,
	"count": 463426,
	"is_parallel": true,
	"self": 58.388163736000706
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 496.5063110930903,
	"count": 231714,
	"self": 6.9528885611148326,
	"children": {
	"process_trajectory": {
	"total": 158.79672647097505,
	"count": 231714,
	"self": 158.25347529297483,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.5432511780002187,
	"count": 4,
	"self": 0.5432511780002187
	}
	}
	},
	"_update_policy": {
	"total": 330.7566960610004,
	"count": 96,
	"self": 275.6255118520005,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 55.13118420899991,
	"count": 2880,
	"self": 55.13118420899991
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 8.950000847107731e-07,
	"count": 1,
	"self": 8.950000847107731e-07
	},
	"TrainerController._save_models": {
	"total": 0.12590315299985377,
	"count": 1,
	"self": 0.00752140699978554,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.11838174600006823,
	"count": 1,
	"self": 0.11838174600006823
	}
	}
	}
	}
	}
	}
	}