dfm794's picture
Baseline to compare to future results
d04e059
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 1.443067193031311,
"min": 1.2800835371017456,
"max": 3.2957329750061035,
"count": 5000
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 28538.09765625,
"min": 7155.673828125,
"max": 105463.453125,
"count": 5000
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 53.29347826086956,
"min": 40.01639344262295,
"max": 999.0,
"count": 5000
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19612.0,
"min": 14144.0,
"max": 26316.0,
"count": 5000
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1543.6959275457639,
"min": 1192.0038877279922,
"max": 1624.2412797764262,
"count": 4998
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 284040.05066842056,
"min": 2384.0077754559843,
"max": 384109.15090899856,
"count": 4998
},
"SoccerTwos.Step.mean": {
"value": 49999917.0,
"min": 9178.0,
"max": 49999917.0,
"count": 5000
},
"SoccerTwos.Step.sum": {
"value": 49999917.0,
"min": 9178.0,
"max": 49999917.0,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.026132797822356224,
"min": -0.15140753984451294,
"max": 0.19906628131866455,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -4.808434963226318,
"min": -23.524477005004883,
"max": 37.20640182495117,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.0272540096193552,
"min": -0.15328392386436462,
"max": 0.19725003838539124,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -5.014737606048584,
"min": -23.92275619506836,
"max": 37.541831970214844,
"count": 5000
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 5000
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.021132607822832855,
"min": -0.75,
"max": 0.7430679280802889,
"count": 5000
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 3.888399839401245,
"min": -70.8840001821518,
"max": 78.82120013237,
"count": 5000
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.021132607822832855,
"min": -0.75,
"max": 0.7430679280802889,
"count": 5000
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 3.888399839401245,
"min": -70.8840001821518,
"max": 78.82120013237,
"count": 5000
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 5000
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 5000
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.017004649222265775,
"min": 0.009633074485464022,
"max": 0.026020678194860616,
"count": 2426
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.017004649222265775,
"min": 0.009633074485464022,
"max": 0.026020678194860616,
"count": 2426
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.1094802608092626,
"min": 0.0011860078220100453,
"max": 0.12590512832005818,
"count": 2426
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.1094802608092626,
"min": 0.0011860078220100453,
"max": 0.12590512832005818,
"count": 2426
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.11046213234464328,
"min": 0.0012122190305187057,
"max": 0.12837031508485477,
"count": 2426
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.11046213234464328,
"min": 0.0012122190305187057,
"max": 0.12837031508485477,
"count": 2426
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 2426
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 2426
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000004,
"max": 0.20000000000000007,
"count": 2426
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000004,
"max": 0.20000000000000007,
"count": 2426
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 2426
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 2426
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1675490816",
"python_version": "3.9.16 (main, Jan 11 2023, 16:05:54) \n[GCC 11.2.0]",
"command_line_arguments": "/home/dfm/anaconda3/envs/hf-rl-scr2s/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0+cu102",
"numpy_version": "1.21.2",
"end_time_seconds": "1675585492"
},
"total": 94676.13045854901,
"count": 1,
"self": 0.2701093499781564,
"children": {
"run_training.setup": {
"total": 0.008507276070304215,
"count": 1,
"self": 0.008507276070304215
},
"TrainerController.start_learning": {
"total": 94675.85184192297,
"count": 1,
"self": 50.37102641293313,
"children": {
"TrainerController._reset_env": {
"total": 9.127092732000165,
"count": 250,
"self": 9.127092732000165
},
"TrainerController.advance": {
"total": 94616.16810552415,
"count": 3454274,
"self": 54.619594351854175,
"children": {
"env_step": {
"total": 79265.20096260938,
"count": 3454274,
"self": 69325.05719639012,
"children": {
"SubprocessEnvManager._take_step": {
"total": 9909.401935333153,
"count": 3454274,
"self": 340.0472827787744,
"children": {
"TorchPolicy.evaluate": {
"total": 9569.354652554379,
"count": 6276130,
"self": 9569.354652554379
}
}
},
"workers": {
"total": 30.7418308861088,
"count": 3454274,
"self": 0.0,
"children": {
"worker_root": {
"total": 94551.04539222934,
"count": 3454274,
"is_parallel": true,
"self": 32818.115737056825,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.006051821052096784,
"count": 2,
"is_parallel": true,
"self": 0.001564615173265338,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.004487205878831446,
"count": 8,
"is_parallel": true,
"self": 0.004487205878831446
}
}
},
"UnityEnvironment.step": {
"total": 0.04235922091174871,
"count": 1,
"is_parallel": true,
"self": 0.0014879449736326933,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0010109150316566229,
"count": 1,
"is_parallel": true,
"self": 0.0010109150316566229
},
"communicator.exchange": {
"total": 0.03561850497499108,
"count": 1,
"is_parallel": true,
"self": 0.03561850497499108
},
"steps_from_proto": {
"total": 0.004241855931468308,
"count": 2,
"is_parallel": true,
"self": 0.0007875368464738131,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.003454319084994495,
"count": 8,
"is_parallel": true,
"self": 0.003454319084994495
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 61732.05164524284,
"count": 3454273,
"is_parallel": true,
"self": 3290.34492288786,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 1967.9171182445716,
"count": 3454273,
"is_parallel": true,
"self": 1967.9171182445716
},
"communicator.exchange": {
"total": 47325.6594340459,
"count": 3454273,
"is_parallel": true,
"self": 47325.6594340459
},
"steps_from_proto": {
"total": 9148.130170064513,
"count": 6908546,
"is_parallel": true,
"self": 1681.556728017982,
"children": {
"_process_rank_one_or_two_observation": {
"total": 7466.5734420465305,
"count": 27634184,
"is_parallel": true,
"self": 7466.5734420465305
}
}
}
}
},
"steps_from_proto": {
"total": 0.8780099296709523,
"count": 498,
"is_parallel": true,
"self": 0.1597570839803666,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.7182528456905857,
"count": 1992,
"is_parallel": true,
"self": 0.7182528456905857
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 15296.347548562917,
"count": 3454274,
"self": 422.38148571527563,
"children": {
"process_trajectory": {
"total": 6290.632544019143,
"count": 3454274,
"self": 6272.337288673385,
"children": {
"RLTrainer._checkpoint": {
"total": 18.29525534575805,
"count": 100,
"self": 18.29525534575805
}
}
},
"_update_policy": {
"total": 8583.333518828498,
"count": 2426,
"self": 5286.001546507468,
"children": {
"TorchPOCAOptimizer.update": {
"total": 3297.3319723210298,
"count": 72789,
"self": 3297.3319723210298
}
}
}
}
}
}
},
"trainer_threads": {
"total": 9.619398042559624e-07,
"count": 1,
"self": 9.619398042559624e-07
},
"TrainerController._save_models": {
"total": 0.18561629194300622,
"count": 1,
"self": 0.001208800938911736,
"children": {
"RLTrainer._checkpoint": {
"total": 0.18440749100409448,
"count": 1,
"self": 0.18440749100409448
}
}
}
}
}
}
}