NatalieCheong's picture
First training of Pyramids
8aa96d5 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.2797270715236664,
"min": 0.2797270715236664,
"max": 1.4494603872299194,
"count": 51
},
"Pyramids.Policy.Entropy.sum": {
"value": 8476.849609375,
"min": 8476.849609375,
"max": 43970.83203125,
"count": 51
},
"Pyramids.Step.mean": {
"value": 1529991.0,
"min": 29952.0,
"max": 1529991.0,
"count": 51
},
"Pyramids.Step.sum": {
"value": 1529991.0,
"min": 29952.0,
"max": 1529991.0,
"count": 51
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.5398198962211609,
"min": -0.10549866408109665,
"max": 0.6009782552719116,
"count": 51
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 147.91064453125,
"min": -25.42517852783203,
"max": 166.47097778320312,
"count": 51
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.008302798494696617,
"min": -0.010379468090832233,
"max": 0.32014110684394836,
"count": 51
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 2.2749667167663574,
"min": -2.8647332191467285,
"max": 77.15400695800781,
"count": 51
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.07263698270253371,
"min": 0.06581168592120298,
"max": 0.07594973567711986,
"count": 51
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 1.016917757835472,
"min": 0.4935376666181509,
"max": 1.0818804220907623,
"count": 51
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.01202843454839673,
"min": 0.0006601675238454783,
"max": 0.014907389456446669,
"count": 51
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.1683980836775542,
"min": 0.00792201028614574,
"max": 0.20990820526397633,
"count": 51
},
"Pyramids.Policy.LearningRate.mean": {
"value": 0.00014845887908515238,
"min": 0.00014845887908515238,
"max": 0.00029838354339596195,
"count": 51
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.0020784243071921334,
"min": 0.0020784243071921334,
"max": 0.003927843190718967,
"count": 51
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.14948627619047622,
"min": 0.14948627619047622,
"max": 0.19946118095238097,
"count": 51
},
"Pyramids.Policy.Epsilon.sum": {
"value": 2.092807866666667,
"min": 1.3962282666666668,
"max": 2.7092810333333333,
"count": 51
},
"Pyramids.Policy.Beta.mean": {
"value": 0.00495367899142857,
"min": 0.00495367899142857,
"max": 0.009946171977142856,
"count": 51
},
"Pyramids.Policy.Beta.sum": {
"value": 0.06935150587999998,
"min": 0.06935150587999998,
"max": 0.13093717523,
"count": 51
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.006247273180633783,
"min": 0.006247273180633783,
"max": 0.4162982106208801,
"count": 51
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.08746182173490524,
"min": 0.08746182173490524,
"max": 2.9140875339508057,
"count": 51
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 372.5609756097561,
"min": 332.6,
"max": 999.0,
"count": 51
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 30550.0,
"min": 15984.0,
"max": 33950.0,
"count": 51
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.5294394857353635,
"min": -1.0000000521540642,
"max": 1.6004578200090362,
"count": 51
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 123.88459834456444,
"min": -29.87800160050392,
"max": 146.5601980611682,
"count": 51
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.5294394857353635,
"min": -1.0000000521540642,
"max": 1.6004578200090362,
"count": 51
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 123.88459834456444,
"min": -29.87800160050392,
"max": 146.5601980611682,
"count": 51
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.024554764987386325,
"min": 0.024554764987386325,
"max": 7.6601913664489985,
"count": 51
},
"Pyramids.Policy.RndReward.sum": {
"value": 1.9889359639782924,
"min": 1.978144668362802,
"max": 122.56306186318398,
"count": 51
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 51
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 51
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1724471923",
"python_version": "3.10.12 (main, Jul 29 2024, 16:56:48) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.3.1+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1724476963"
},
"total": 5040.241456817,
"count": 1,
"self": 0.48358017999908043,
"children": {
"run_training.setup": {
"total": 0.07504346499990788,
"count": 1,
"self": 0.07504346499990788
},
"TrainerController.start_learning": {
"total": 5039.682833172001,
"count": 1,
"self": 3.5302959500922952,
"children": {
"TrainerController._reset_env": {
"total": 3.3901487450002605,
"count": 1,
"self": 3.3901487450002605
},
"TrainerController.advance": {
"total": 5032.6101953259085,
"count": 98793,
"self": 3.9608414108543,
"children": {
"env_step": {
"total": 3403.325599734051,
"count": 98793,
"self": 3151.3776006142475,
"children": {
"SubprocessEnvManager._take_step": {
"total": 249.67941184999745,
"count": 98793,
"self": 11.0534170220958,
"children": {
"TorchPolicy.evaluate": {
"total": 238.62599482790165,
"count": 96710,
"self": 238.62599482790165
}
}
},
"workers": {
"total": 2.2685872698061758,
"count": 98792,
"self": 0.0,
"children": {
"worker_root": {
"total": 5028.501087116012,
"count": 98792,
"is_parallel": true,
"self": 2166.5202629661235,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0034656790003282367,
"count": 1,
"is_parallel": true,
"self": 0.0011825940000562696,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.002283085000271967,
"count": 8,
"is_parallel": true,
"self": 0.002283085000271967
}
}
},
"UnityEnvironment.step": {
"total": 0.08306048299982649,
"count": 1,
"is_parallel": true,
"self": 0.0007720589997006755,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0005785889998151106,
"count": 1,
"is_parallel": true,
"self": 0.0005785889998151106
},
"communicator.exchange": {
"total": 0.07980466400022124,
"count": 1,
"is_parallel": true,
"self": 0.07980466400022124
},
"steps_from_proto": {
"total": 0.0019051710000894673,
"count": 1,
"is_parallel": true,
"self": 0.00042546499980744557,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0014797060002820217,
"count": 8,
"is_parallel": true,
"self": 0.0014797060002820217
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 2861.9808241498886,
"count": 98791,
"is_parallel": true,
"self": 76.87792465067469,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 47.14168820803934,
"count": 98791,
"is_parallel": true,
"self": 47.14168820803934
},
"communicator.exchange": {
"total": 2534.8883740401966,
"count": 98791,
"is_parallel": true,
"self": 2534.8883740401966
},
"steps_from_proto": {
"total": 203.07283725097795,
"count": 98791,
"is_parallel": true,
"self": 44.120990863800216,
"children": {
"_process_rank_one_or_two_observation": {
"total": 158.95184638717774,
"count": 790328,
"is_parallel": true,
"self": 158.95184638717774
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 1625.3237541810036,
"count": 98792,
"self": 7.321892404920618,
"children": {
"process_trajectory": {
"total": 253.45929564307653,
"count": 98792,
"self": 253.1046998920765,
"children": {
"RLTrainer._checkpoint": {
"total": 0.3545957510000335,
"count": 3,
"self": 0.3545957510000335
}
}
},
"_update_policy": {
"total": 1364.5425661330064,
"count": 709,
"self": 563.8000871289537,
"children": {
"TorchPPOOptimizer.update": {
"total": 800.7424790040527,
"count": 35220,
"self": 800.7424790040527
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.7239999579032883e-06,
"count": 1,
"self": 1.7239999579032883e-06
},
"TrainerController._save_models": {
"total": 0.1521914269997069,
"count": 1,
"self": 0.0028344880001895945,
"children": {
"RLTrainer._checkpoint": {
"total": 0.1493569389995173,
"count": 1,
"self": 0.1493569389995173
}
}
}
}
}
}
}