poca-SoccerTwos-baseline / run_logs /timers.json

Baseline to compare to future results

d04e059 about 2 years ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.443067193031311,
	"min": 1.2800835371017456,
	"max": 3.2957329750061035,
	"count": 5000
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 28538.09765625,
	"min": 7155.673828125,
	"max": 105463.453125,
	"count": 5000
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 53.29347826086956,
	"min": 40.01639344262295,
	"max": 999.0,
	"count": 5000
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19612.0,
	"min": 14144.0,
	"max": 26316.0,
	"count": 5000
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1543.6959275457639,
	"min": 1192.0038877279922,
	"max": 1624.2412797764262,
	"count": 4998
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 284040.05066842056,
	"min": 2384.0077754559843,
	"max": 384109.15090899856,
	"count": 4998
	},
	"SoccerTwos.Step.mean": {
	"value": 49999917.0,
	"min": 9178.0,
	"max": 49999917.0,
	"count": 5000
	},
	"SoccerTwos.Step.sum": {
	"value": 49999917.0,
	"min": 9178.0,
	"max": 49999917.0,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.026132797822356224,
	"min": -0.15140753984451294,
	"max": 0.19906628131866455,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -4.808434963226318,
	"min": -23.524477005004883,
	"max": 37.20640182495117,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.0272540096193552,
	"min": -0.15328392386436462,
	"max": 0.19725003838539124,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -5.014737606048584,
	"min": -23.92275619506836,
	"max": 37.541831970214844,
	"count": 5000
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 5000
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.021132607822832855,
	"min": -0.75,
	"max": 0.7430679280802889,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 3.888399839401245,
	"min": -70.8840001821518,
	"max": 78.82120013237,
	"count": 5000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.021132607822832855,
	"min": -0.75,
	"max": 0.7430679280802889,
	"count": 5000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 3.888399839401245,
	"min": -70.8840001821518,
	"max": 78.82120013237,
	"count": 5000
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 5000
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 5000
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.017004649222265775,
	"min": 0.009633074485464022,
	"max": 0.026020678194860616,
	"count": 2426
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.017004649222265775,
	"min": 0.009633074485464022,
	"max": 0.026020678194860616,
	"count": 2426
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.1094802608092626,
	"min": 0.0011860078220100453,
	"max": 0.12590512832005818,
	"count": 2426
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.1094802608092626,
	"min": 0.0011860078220100453,
	"max": 0.12590512832005818,
	"count": 2426
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.11046213234464328,
	"min": 0.0012122190305187057,
	"max": 0.12837031508485477,
	"count": 2426
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.11046213234464328,
	"min": 0.0012122190305187057,
	"max": 0.12837031508485477,
	"count": 2426
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 2426
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 2426
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000004,
	"max": 0.20000000000000007,
	"count": 2426
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000004,
	"max": 0.20000000000000007,
	"count": 2426
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 2426
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 2426
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1675490816",
	"python_version": "3.9.16 (main, Jan 11 2023, 16:05:54) \n[GCC 11.2.0]",
	"command_line_arguments": "/home/dfm/anaconda3/envs/hf-rl-scr2s/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0+cu102",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1675585492"
	},
	"total": 94676.13045854901,
	"count": 1,
	"self": 0.2701093499781564,
	"children": {
	"run_training.setup": {
	"total": 0.008507276070304215,
	"count": 1,
	"self": 0.008507276070304215
	},
	"TrainerController.start_learning": {
	"total": 94675.85184192297,
	"count": 1,
	"self": 50.37102641293313,
	"children": {
	"TrainerController._reset_env": {
	"total": 9.127092732000165,
	"count": 250,
	"self": 9.127092732000165
	},
	"TrainerController.advance": {
	"total": 94616.16810552415,
	"count": 3454274,
	"self": 54.619594351854175,
	"children": {
	"env_step": {
	"total": 79265.20096260938,
	"count": 3454274,
	"self": 69325.05719639012,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 9909.401935333153,
	"count": 3454274,
	"self": 340.0472827787744,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 9569.354652554379,
	"count": 6276130,
	"self": 9569.354652554379
	}
	}
	},
	"workers": {
	"total": 30.7418308861088,
	"count": 3454274,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 94551.04539222934,
	"count": 3454274,
	"is_parallel": true,
	"self": 32818.115737056825,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.006051821052096784,
	"count": 2,
	"is_parallel": true,
	"self": 0.001564615173265338,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.004487205878831446,
	"count": 8,
	"is_parallel": true,
	"self": 0.004487205878831446
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.04235922091174871,
	"count": 1,
	"is_parallel": true,
	"self": 0.0014879449736326933,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0010109150316566229,
	"count": 1,
	"is_parallel": true,
	"self": 0.0010109150316566229
	},
	"communicator.exchange": {
	"total": 0.03561850497499108,
	"count": 1,
	"is_parallel": true,
	"self": 0.03561850497499108
	},
	"steps_from_proto": {
	"total": 0.004241855931468308,
	"count": 2,
	"is_parallel": true,
	"self": 0.0007875368464738131,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.003454319084994495,
	"count": 8,
	"is_parallel": true,
	"self": 0.003454319084994495
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 61732.05164524284,
	"count": 3454273,
	"is_parallel": true,
	"self": 3290.34492288786,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 1967.9171182445716,
	"count": 3454273,
	"is_parallel": true,
	"self": 1967.9171182445716
	},
	"communicator.exchange": {
	"total": 47325.6594340459,
	"count": 3454273,
	"is_parallel": true,
	"self": 47325.6594340459
	},
	"steps_from_proto": {
	"total": 9148.130170064513,
	"count": 6908546,
	"is_parallel": true,
	"self": 1681.556728017982,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 7466.5734420465305,
	"count": 27634184,
	"is_parallel": true,
	"self": 7466.5734420465305
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.8780099296709523,
	"count": 498,
	"is_parallel": true,
	"self": 0.1597570839803666,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.7182528456905857,
	"count": 1992,
	"is_parallel": true,
	"self": 0.7182528456905857
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 15296.347548562917,
	"count": 3454274,
	"self": 422.38148571527563,
	"children": {
	"process_trajectory": {
	"total": 6290.632544019143,
	"count": 3454274,
	"self": 6272.337288673385,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 18.29525534575805,
	"count": 100,
	"self": 18.29525534575805
	}
	}
	},
	"_update_policy": {
	"total": 8583.333518828498,
	"count": 2426,
	"self": 5286.001546507468,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 3297.3319723210298,
	"count": 72789,
	"self": 3297.3319723210298
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 9.619398042559624e-07,
	"count": 1,
	"self": 9.619398042559624e-07
	},
	"TrainerController._save_models": {
	"total": 0.18561629194300622,
	"count": 1,
	"self": 0.001208800938911736,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.18440749100409448,
	"count": 1,
	"self": 0.18440749100409448
	}
	}
	}
	}
	}
	}
	}