diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -957,3 +957,1257 @@ main_loop: 1098.7486 [2024-10-20 17:41:33,710][00556] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480 [2024-10-20 17:41:33,712][00556] Avg episode reward: 5.480, avg true_objective: 4.480 [2024-10-20 17:41:53,429][00556] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-10-20 17:41:57,910][00556] The model has been pushed to https://huggingface.co/jerryvc/rl_course_vizdoom_health_gathering_supreme +[2024-10-20 17:42:35,660][00556] Environment doom_basic already registered, overwriting... +[2024-10-20 17:42:35,663][00556] Environment doom_two_colors_easy already registered, overwriting... +[2024-10-20 17:42:35,664][00556] Environment doom_two_colors_hard already registered, overwriting... +[2024-10-20 17:42:35,668][00556] Environment doom_dm already registered, overwriting... +[2024-10-20 17:42:35,670][00556] Environment doom_dwango5 already registered, overwriting... +[2024-10-20 17:42:35,671][00556] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-10-20 17:42:35,672][00556] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-10-20 17:42:35,673][00556] Environment doom_my_way_home already registered, overwriting... +[2024-10-20 17:42:35,675][00556] Environment doom_deadly_corridor already registered, overwriting... +[2024-10-20 17:42:35,676][00556] Environment doom_defend_the_center already registered, overwriting... +[2024-10-20 17:42:35,678][00556] Environment doom_defend_the_line already registered, overwriting... +[2024-10-20 17:42:35,680][00556] Environment doom_health_gathering already registered, overwriting... +[2024-10-20 17:42:35,681][00556] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-10-20 17:42:35,683][00556] Environment doom_battle already registered, overwriting... +[2024-10-20 17:42:35,684][00556] Environment doom_battle2 already registered, overwriting... +[2024-10-20 17:42:35,685][00556] Environment doom_duel_bots already registered, overwriting... +[2024-10-20 17:42:35,686][00556] Environment doom_deathmatch_bots already registered, overwriting... +[2024-10-20 17:42:35,687][00556] Environment doom_duel already registered, overwriting... +[2024-10-20 17:42:35,688][00556] Environment doom_deathmatch_full already registered, overwriting... +[2024-10-20 17:42:35,689][00556] Environment doom_benchmark already registered, overwriting... +[2024-10-20 17:42:35,690][00556] register_encoder_factory: +[2024-10-20 17:42:35,716][00556] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-10-20 17:42:35,717][00556] Overriding arg 'train_for_env_steps' with value 8000000 passed from command line +[2024-10-20 17:42:35,724][00556] Experiment dir /content/train_dir/default_experiment already exists! +[2024-10-20 17:42:35,726][00556] Resuming existing experiment from /content/train_dir/default_experiment... +[2024-10-20 17:42:35,728][00556] Weights and Biases integration disabled +[2024-10-20 17:42:35,732][00556] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2024-10-20 17:42:37,855][00556] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=8000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=unknown +git_repo_name=not a git repository +[2024-10-20 17:42:37,856][00556] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-10-20 17:42:37,861][00556] Rollout worker 0 uses device cpu +[2024-10-20 17:42:37,862][00556] Rollout worker 1 uses device cpu +[2024-10-20 17:42:37,864][00556] Rollout worker 2 uses device cpu +[2024-10-20 17:42:37,865][00556] Rollout worker 3 uses device cpu +[2024-10-20 17:42:37,867][00556] Rollout worker 4 uses device cpu +[2024-10-20 17:42:37,869][00556] Rollout worker 5 uses device cpu +[2024-10-20 17:42:37,870][00556] Rollout worker 6 uses device cpu +[2024-10-20 17:42:37,871][00556] Rollout worker 7 uses device cpu +[2024-10-20 17:42:37,944][00556] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-10-20 17:42:37,946][00556] InferenceWorker_p0-w0: min num requests: 2 +[2024-10-20 17:42:37,987][00556] Starting all processes... +[2024-10-20 17:42:37,990][00556] Starting process learner_proc0 +[2024-10-20 17:42:38,037][00556] Starting all processes... +[2024-10-20 17:42:38,044][00556] Starting process inference_proc0-0 +[2024-10-20 17:42:38,044][00556] Starting process rollout_proc0 +[2024-10-20 17:42:38,046][00556] Starting process rollout_proc1 +[2024-10-20 17:42:38,046][00556] Starting process rollout_proc2 +[2024-10-20 17:42:38,046][00556] Starting process rollout_proc3 +[2024-10-20 17:42:38,046][00556] Starting process rollout_proc4 +[2024-10-20 17:42:38,046][00556] Starting process rollout_proc5 +[2024-10-20 17:42:38,046][00556] Starting process rollout_proc6 +[2024-10-20 17:42:38,046][00556] Starting process rollout_proc7 +[2024-10-20 17:42:53,558][13111] Worker 3 uses CPU cores [1] +[2024-10-20 17:42:53,579][13094] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-10-20 17:42:53,580][13094] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-10-20 17:42:53,604][13107] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-10-20 17:42:53,606][13107] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-10-20 17:42:53,638][13094] Num visible devices: 1 +[2024-10-20 17:42:53,683][13094] Starting seed is not provided +[2024-10-20 17:42:53,685][13094] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-10-20 17:42:53,686][13094] Initializing actor-critic model on device cuda:0 +[2024-10-20 17:42:53,687][13094] RunningMeanStd input shape: (3, 72, 128) +[2024-10-20 17:42:53,688][13094] RunningMeanStd input shape: (1,) +[2024-10-20 17:42:53,713][13107] Num visible devices: 1 +[2024-10-20 17:42:53,758][13094] ConvEncoder: input_channels=3 +[2024-10-20 17:42:53,874][13112] Worker 5 uses CPU cores [1] +[2024-10-20 17:42:53,974][13109] Worker 1 uses CPU cores [1] +[2024-10-20 17:42:54,079][13115] Worker 7 uses CPU cores [1] +[2024-10-20 17:42:54,080][13113] Worker 4 uses CPU cores [0] +[2024-10-20 17:42:54,130][13110] Worker 2 uses CPU cores [0] +[2024-10-20 17:42:54,201][13108] Worker 0 uses CPU cores [0] +[2024-10-20 17:42:54,211][13094] Conv encoder output size: 512 +[2024-10-20 17:42:54,213][13094] Policy head output size: 512 +[2024-10-20 17:42:54,222][13114] Worker 6 uses CPU cores [0] +[2024-10-20 17:42:54,234][13094] Created Actor Critic model with architecture: +[2024-10-20 17:42:54,234][13094] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-10-20 17:42:54,359][13094] Using optimizer +[2024-10-20 17:42:54,991][13094] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-10-20 17:42:55,028][13094] Loading model from checkpoint +[2024-10-20 17:42:55,030][13094] Loaded experiment state at self.train_step=978, self.env_steps=4005888 +[2024-10-20 17:42:55,031][13094] Initialized policy 0 weights for model version 978 +[2024-10-20 17:42:55,035][13094] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-10-20 17:42:55,043][13094] LearnerWorker_p0 finished initialization! +[2024-10-20 17:42:55,129][13107] RunningMeanStd input shape: (3, 72, 128) +[2024-10-20 17:42:55,131][13107] RunningMeanStd input shape: (1,) +[2024-10-20 17:42:55,143][13107] ConvEncoder: input_channels=3 +[2024-10-20 17:42:55,247][13107] Conv encoder output size: 512 +[2024-10-20 17:42:55,248][13107] Policy head output size: 512 +[2024-10-20 17:42:55,301][00556] Inference worker 0-0 is ready! +[2024-10-20 17:42:55,302][00556] All inference workers are ready! Signal rollout workers to start! +[2024-10-20 17:42:55,505][13108] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-20 17:42:55,506][13113] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-20 17:42:55,509][13110] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-20 17:42:55,510][13114] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-20 17:42:55,515][13112] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-20 17:42:55,515][13111] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-20 17:42:55,512][13115] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-20 17:42:55,525][13109] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-10-20 17:42:55,732][00556] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4005888. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-10-20 17:42:56,491][13112] Decorrelating experience for 0 frames... +[2024-10-20 17:42:56,498][13109] Decorrelating experience for 0 frames... +[2024-10-20 17:42:56,881][13114] Decorrelating experience for 0 frames... +[2024-10-20 17:42:56,901][13110] Decorrelating experience for 0 frames... +[2024-10-20 17:42:56,909][13113] Decorrelating experience for 0 frames... +[2024-10-20 17:42:57,471][13109] Decorrelating experience for 32 frames... +[2024-10-20 17:42:57,936][00556] Heartbeat connected on Batcher_0 +[2024-10-20 17:42:57,944][00556] Heartbeat connected on LearnerWorker_p0 +[2024-10-20 17:42:57,998][00556] Heartbeat connected on InferenceWorker_p0-w0 +[2024-10-20 17:42:58,332][13114] Decorrelating experience for 32 frames... +[2024-10-20 17:42:58,348][13110] Decorrelating experience for 32 frames... +[2024-10-20 17:42:58,399][13108] Decorrelating experience for 0 frames... +[2024-10-20 17:42:58,830][13112] Decorrelating experience for 32 frames... +[2024-10-20 17:42:58,841][13115] Decorrelating experience for 0 frames... +[2024-10-20 17:42:59,630][13109] Decorrelating experience for 64 frames... +[2024-10-20 17:43:00,297][13113] Decorrelating experience for 32 frames... +[2024-10-20 17:43:00,332][13108] Decorrelating experience for 32 frames... +[2024-10-20 17:43:00,408][13111] Decorrelating experience for 0 frames... +[2024-10-20 17:43:00,732][00556] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-10-20 17:43:00,807][13114] Decorrelating experience for 64 frames... +[2024-10-20 17:43:00,811][13110] Decorrelating experience for 64 frames... +[2024-10-20 17:43:01,381][13109] Decorrelating experience for 96 frames... +[2024-10-20 17:43:01,623][00556] Heartbeat connected on RolloutWorker_w1 +[2024-10-20 17:43:01,840][13114] Decorrelating experience for 96 frames... +[2024-10-20 17:43:02,372][13115] Decorrelating experience for 32 frames... +[2024-10-20 17:43:02,396][00556] Heartbeat connected on RolloutWorker_w6 +[2024-10-20 17:43:03,033][13112] Decorrelating experience for 64 frames... +[2024-10-20 17:43:03,525][13108] Decorrelating experience for 64 frames... +[2024-10-20 17:43:04,677][13115] Decorrelating experience for 64 frames... +[2024-10-20 17:43:04,830][13110] Decorrelating experience for 96 frames... +[2024-10-20 17:43:05,205][00556] Heartbeat connected on RolloutWorker_w2 +[2024-10-20 17:43:05,295][13112] Decorrelating experience for 96 frames... +[2024-10-20 17:43:05,425][13113] Decorrelating experience for 64 frames... +[2024-10-20 17:43:05,634][13108] Decorrelating experience for 96 frames... +[2024-10-20 17:43:05,732][00556] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 80.8. Samples: 808. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-10-20 17:43:05,735][00556] Avg episode reward: [(0, '3.382')] +[2024-10-20 17:43:05,893][00556] Heartbeat connected on RolloutWorker_w5 +[2024-10-20 17:43:06,179][00556] Heartbeat connected on RolloutWorker_w0 +[2024-10-20 17:43:08,107][13094] Signal inference workers to stop experience collection... +[2024-10-20 17:43:08,114][13107] InferenceWorker_p0-w0: stopping experience collection +[2024-10-20 17:43:08,388][13113] Decorrelating experience for 96 frames... +[2024-10-20 17:43:08,426][13111] Decorrelating experience for 32 frames... +[2024-10-20 17:43:08,491][13115] Decorrelating experience for 96 frames... +[2024-10-20 17:43:08,573][00556] Heartbeat connected on RolloutWorker_w4 +[2024-10-20 17:43:08,667][00556] Heartbeat connected on RolloutWorker_w7 +[2024-10-20 17:43:09,051][13111] Decorrelating experience for 64 frames... +[2024-10-20 17:43:09,471][13111] Decorrelating experience for 96 frames... +[2024-10-20 17:43:09,551][00556] Heartbeat connected on RolloutWorker_w3 +[2024-10-20 17:43:09,890][13094] Signal inference workers to resume experience collection... +[2024-10-20 17:43:09,890][13107] InferenceWorker_p0-w0: resuming experience collection +[2024-10-20 17:43:10,732][00556] Fps is (10 sec: 409.6, 60 sec: 273.1, 300 sec: 273.1). Total num frames: 4009984. Throughput: 0: 180.8. Samples: 2712. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-10-20 17:43:10,738][00556] Avg episode reward: [(0, '3.453')] +[2024-10-20 17:43:15,732][00556] Fps is (10 sec: 2457.5, 60 sec: 1228.8, 300 sec: 1228.8). Total num frames: 4030464. Throughput: 0: 277.4. Samples: 5548. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:43:15,737][00556] Avg episode reward: [(0, '3.936')] +[2024-10-20 17:43:20,732][00556] Fps is (10 sec: 3276.8, 60 sec: 1474.6, 300 sec: 1474.6). Total num frames: 4042752. Throughput: 0: 373.3. Samples: 9332. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:43:20,735][00556] Avg episode reward: [(0, '4.227')] +[2024-10-20 17:43:21,421][13107] Updated weights for policy 0, policy_version 988 (0.0032) +[2024-10-20 17:43:25,732][00556] Fps is (10 sec: 3276.9, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 4063232. Throughput: 0: 507.3. Samples: 15218. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:43:25,735][00556] Avg episode reward: [(0, '4.741')] +[2024-10-20 17:43:30,555][13107] Updated weights for policy 0, policy_version 998 (0.0033) +[2024-10-20 17:43:30,732][00556] Fps is (10 sec: 4505.7, 60 sec: 2340.6, 300 sec: 2340.6). Total num frames: 4087808. Throughput: 0: 530.6. Samples: 18570. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:43:30,738][00556] Avg episode reward: [(0, '4.732')] +[2024-10-20 17:43:35,732][00556] Fps is (10 sec: 3686.4, 60 sec: 2355.2, 300 sec: 2355.2). Total num frames: 4100096. Throughput: 0: 586.8. Samples: 23472. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:43:35,735][00556] Avg episode reward: [(0, '4.839')] +[2024-10-20 17:43:40,732][00556] Fps is (10 sec: 3276.8, 60 sec: 2548.6, 300 sec: 2548.6). Total num frames: 4120576. Throughput: 0: 640.1. Samples: 28804. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:43:40,735][00556] Avg episode reward: [(0, '4.527')] +[2024-10-20 17:43:42,520][13107] Updated weights for policy 0, policy_version 1008 (0.0023) +[2024-10-20 17:43:45,732][00556] Fps is (10 sec: 4096.0, 60 sec: 2703.4, 300 sec: 2703.4). Total num frames: 4141056. Throughput: 0: 715.9. Samples: 32214. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:43:45,734][00556] Avg episode reward: [(0, '4.514')] +[2024-10-20 17:43:50,732][00556] Fps is (10 sec: 3686.4, 60 sec: 2755.5, 300 sec: 2755.5). Total num frames: 4157440. Throughput: 0: 834.2. Samples: 38346. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:43:50,734][00556] Avg episode reward: [(0, '4.586')] +[2024-10-20 17:43:54,457][13107] Updated weights for policy 0, policy_version 1018 (0.0019) +[2024-10-20 17:43:55,732][00556] Fps is (10 sec: 3276.8, 60 sec: 2798.9, 300 sec: 2798.9). Total num frames: 4173824. Throughput: 0: 881.2. Samples: 42366. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:43:55,735][00556] Avg episode reward: [(0, '4.691')] +[2024-10-20 17:44:00,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3140.3, 300 sec: 2898.7). Total num frames: 4194304. Throughput: 0: 891.9. Samples: 45682. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:44:00,734][00556] Avg episode reward: [(0, '4.854')] +[2024-10-20 17:44:03,774][13107] Updated weights for policy 0, policy_version 1028 (0.0029) +[2024-10-20 17:44:05,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3042.7). Total num frames: 4218880. Throughput: 0: 960.8. Samples: 52566. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:44:05,738][00556] Avg episode reward: [(0, '4.895')] +[2024-10-20 17:44:10,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3003.7). Total num frames: 4231168. Throughput: 0: 927.4. Samples: 56952. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-10-20 17:44:10,736][00556] Avg episode reward: [(0, '4.792')] +[2024-10-20 17:44:15,411][13107] Updated weights for policy 0, policy_version 1038 (0.0043) +[2024-10-20 17:44:15,733][00556] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3072.0). Total num frames: 4251648. Throughput: 0: 912.4. Samples: 59630. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:44:15,734][00556] Avg episode reward: [(0, '4.549')] +[2024-10-20 17:44:20,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3132.2). Total num frames: 4272128. Throughput: 0: 955.3. Samples: 66460. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:44:20,736][00556] Avg episode reward: [(0, '4.646')] +[2024-10-20 17:44:25,732][00556] Fps is (10 sec: 3686.5, 60 sec: 3754.7, 300 sec: 3140.3). Total num frames: 4288512. Throughput: 0: 949.8. Samples: 71544. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:44:25,734][00556] Avg episode reward: [(0, '4.754')] +[2024-10-20 17:44:26,186][13107] Updated weights for policy 0, policy_version 1048 (0.0021) +[2024-10-20 17:44:30,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3147.5). Total num frames: 4304896. Throughput: 0: 919.3. Samples: 73584. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-10-20 17:44:30,734][00556] Avg episode reward: [(0, '4.854')] +[2024-10-20 17:44:35,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3235.8). Total num frames: 4329472. Throughput: 0: 929.7. Samples: 80184. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:44:35,736][00556] Avg episode reward: [(0, '5.021')] +[2024-10-20 17:44:35,747][13094] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001057_4329472.pth... +[2024-10-20 17:44:35,875][13094] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000955_3911680.pth +[2024-10-20 17:44:36,413][13107] Updated weights for policy 0, policy_version 1058 (0.0021) +[2024-10-20 17:44:40,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3276.8). Total num frames: 4349952. Throughput: 0: 979.6. Samples: 86446. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:44:40,734][00556] Avg episode reward: [(0, '4.849')] +[2024-10-20 17:44:45,732][00556] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3239.6). Total num frames: 4362240. Throughput: 0: 953.0. Samples: 88566. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-10-20 17:44:45,736][00556] Avg episode reward: [(0, '4.541')] +[2024-10-20 17:44:47,999][13107] Updated weights for policy 0, policy_version 1068 (0.0020) +[2024-10-20 17:44:50,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3312.4). Total num frames: 4386816. Throughput: 0: 925.2. Samples: 94198. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:44:50,735][00556] Avg episode reward: [(0, '4.307')] +[2024-10-20 17:44:55,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3345.1). Total num frames: 4407296. Throughput: 0: 976.7. Samples: 100902. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:44:55,739][00556] Avg episode reward: [(0, '4.596')] +[2024-10-20 17:44:57,449][13107] Updated weights for policy 0, policy_version 1078 (0.0027) +[2024-10-20 17:45:00,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3342.3). Total num frames: 4423680. Throughput: 0: 973.1. Samples: 103420. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:45:00,738][00556] Avg episode reward: [(0, '4.720')] +[2024-10-20 17:45:05,735][00556] Fps is (10 sec: 3275.8, 60 sec: 3686.2, 300 sec: 3339.7). Total num frames: 4440064. Throughput: 0: 919.6. Samples: 107846. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:45:05,743][00556] Avg episode reward: [(0, '4.712')] +[2024-10-20 17:45:09,072][13107] Updated weights for policy 0, policy_version 1088 (0.0018) +[2024-10-20 17:45:10,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3398.2). Total num frames: 4464640. Throughput: 0: 959.4. Samples: 114716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:45:10,735][00556] Avg episode reward: [(0, '4.698')] +[2024-10-20 17:45:15,732][00556] Fps is (10 sec: 4097.3, 60 sec: 3823.0, 300 sec: 3393.8). Total num frames: 4481024. Throughput: 0: 989.5. Samples: 118110. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:45:15,735][00556] Avg episode reward: [(0, '4.636')] +[2024-10-20 17:45:20,732][00556] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3361.5). Total num frames: 4493312. Throughput: 0: 936.5. Samples: 122328. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:45:20,734][00556] Avg episode reward: [(0, '4.718')] +[2024-10-20 17:45:20,899][13107] Updated weights for policy 0, policy_version 1098 (0.0021) +[2024-10-20 17:45:25,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3413.3). Total num frames: 4517888. Throughput: 0: 933.8. Samples: 128466. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:45:25,734][00556] Avg episode reward: [(0, '4.790')] +[2024-10-20 17:45:29,847][13107] Updated weights for policy 0, policy_version 1108 (0.0025) +[2024-10-20 17:45:30,733][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3435.4). Total num frames: 4538368. Throughput: 0: 962.1. Samples: 131862. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:45:30,736][00556] Avg episode reward: [(0, '4.628')] +[2024-10-20 17:45:35,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3430.4). Total num frames: 4554752. Throughput: 0: 953.4. Samples: 137100. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:45:35,736][00556] Avg episode reward: [(0, '4.701')] +[2024-10-20 17:45:40,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3425.7). Total num frames: 4571136. Throughput: 0: 922.9. Samples: 142434. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:45:40,739][00556] Avg episode reward: [(0, '4.865')] +[2024-10-20 17:45:41,702][13107] Updated weights for policy 0, policy_version 1118 (0.0027) +[2024-10-20 17:45:45,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3469.6). Total num frames: 4595712. Throughput: 0: 944.1. Samples: 145906. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:45:45,739][00556] Avg episode reward: [(0, '4.650')] +[2024-10-20 17:45:50,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3464.0). Total num frames: 4612096. Throughput: 0: 981.3. Samples: 152002. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:45:50,735][00556] Avg episode reward: [(0, '4.598')] +[2024-10-20 17:45:52,339][13107] Updated weights for policy 0, policy_version 1128 (0.0023) +[2024-10-20 17:45:55,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3458.8). Total num frames: 4628480. Throughput: 0: 923.1. Samples: 156256. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:45:55,736][00556] Avg episode reward: [(0, '4.767')] +[2024-10-20 17:46:00,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3476.1). Total num frames: 4648960. Throughput: 0: 923.0. Samples: 159644. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:46:00,736][00556] Avg episode reward: [(0, '4.646')] +[2024-10-20 17:46:02,548][13107] Updated weights for policy 0, policy_version 1138 (0.0032) +[2024-10-20 17:46:05,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3891.4, 300 sec: 3513.9). Total num frames: 4673536. Throughput: 0: 982.5. Samples: 166542. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:46:05,735][00556] Avg episode reward: [(0, '4.671')] +[2024-10-20 17:46:10,740][00556] Fps is (10 sec: 3683.6, 60 sec: 3685.9, 300 sec: 3486.7). Total num frames: 4685824. Throughput: 0: 940.8. Samples: 170808. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:46:10,744][00556] Avg episode reward: [(0, '4.696')] +[2024-10-20 17:46:14,288][13107] Updated weights for policy 0, policy_version 1148 (0.0036) +[2024-10-20 17:46:15,733][00556] Fps is (10 sec: 3276.4, 60 sec: 3754.6, 300 sec: 3502.1). Total num frames: 4706304. Throughput: 0: 926.4. Samples: 173550. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:46:15,736][00556] Avg episode reward: [(0, '4.726')] +[2024-10-20 17:46:20,732][00556] Fps is (10 sec: 4509.1, 60 sec: 3959.5, 300 sec: 3536.5). Total num frames: 4730880. Throughput: 0: 963.6. Samples: 180462. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:46:20,742][00556] Avg episode reward: [(0, '4.856')] +[2024-10-20 17:46:23,949][13107] Updated weights for policy 0, policy_version 1158 (0.0013) +[2024-10-20 17:46:25,732][00556] Fps is (10 sec: 4096.4, 60 sec: 3822.9, 300 sec: 3530.4). Total num frames: 4747264. Throughput: 0: 962.3. Samples: 185738. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:46:25,735][00556] Avg episode reward: [(0, '4.657')] +[2024-10-20 17:46:30,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3524.5). Total num frames: 4763648. Throughput: 0: 932.1. Samples: 187850. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-10-20 17:46:30,737][00556] Avg episode reward: [(0, '4.798')] +[2024-10-20 17:46:34,895][13107] Updated weights for policy 0, policy_version 1168 (0.0032) +[2024-10-20 17:46:35,732][00556] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3537.5). Total num frames: 4784128. Throughput: 0: 944.4. Samples: 194498. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:46:35,737][00556] Avg episode reward: [(0, '4.734')] +[2024-10-20 17:46:35,763][13094] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001169_4788224.pth... +[2024-10-20 17:46:35,889][13094] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth +[2024-10-20 17:46:40,734][00556] Fps is (10 sec: 4095.3, 60 sec: 3891.1, 300 sec: 3549.8). Total num frames: 4804608. Throughput: 0: 990.0. Samples: 200808. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:46:40,739][00556] Avg episode reward: [(0, '4.885')] +[2024-10-20 17:46:45,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3543.9). Total num frames: 4820992. Throughput: 0: 959.0. Samples: 202798. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:46:45,735][00556] Avg episode reward: [(0, '4.974')] +[2024-10-20 17:46:46,922][13107] Updated weights for policy 0, policy_version 1178 (0.0020) +[2024-10-20 17:46:50,732][00556] Fps is (10 sec: 3686.9, 60 sec: 3822.9, 300 sec: 3555.7). Total num frames: 4841472. Throughput: 0: 926.3. Samples: 208226. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:46:50,738][00556] Avg episode reward: [(0, '4.893')] +[2024-10-20 17:46:55,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3566.9). Total num frames: 4861952. Throughput: 0: 981.9. Samples: 214988. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:46:55,735][00556] Avg episode reward: [(0, '4.458')] +[2024-10-20 17:46:55,834][13107] Updated weights for policy 0, policy_version 1188 (0.0027) +[2024-10-20 17:47:00,738][00556] Fps is (10 sec: 3684.5, 60 sec: 3822.6, 300 sec: 3560.9). Total num frames: 4878336. Throughput: 0: 976.0. Samples: 217476. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:47:00,749][00556] Avg episode reward: [(0, '4.403')] +[2024-10-20 17:47:05,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3555.3). Total num frames: 4894720. Throughput: 0: 922.5. Samples: 221974. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:47:05,738][00556] Avg episode reward: [(0, '4.568')] +[2024-10-20 17:47:07,619][13107] Updated weights for policy 0, policy_version 1198 (0.0021) +[2024-10-20 17:47:10,732][00556] Fps is (10 sec: 4098.3, 60 sec: 3891.7, 300 sec: 3582.0). Total num frames: 4919296. Throughput: 0: 958.2. Samples: 228856. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:47:10,735][00556] Avg episode reward: [(0, '4.674')] +[2024-10-20 17:47:15,732][00556] Fps is (10 sec: 4095.9, 60 sec: 3823.0, 300 sec: 3576.1). Total num frames: 4935680. Throughput: 0: 986.0. Samples: 232220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:47:15,739][00556] Avg episode reward: [(0, '4.703')] +[2024-10-20 17:47:18,853][13107] Updated weights for policy 0, policy_version 1208 (0.0025) +[2024-10-20 17:47:20,732][00556] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3570.5). Total num frames: 4952064. Throughput: 0: 931.6. Samples: 236418. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:47:20,735][00556] Avg episode reward: [(0, '4.860')] +[2024-10-20 17:47:25,732][00556] Fps is (10 sec: 3686.5, 60 sec: 3754.7, 300 sec: 3580.2). Total num frames: 4972544. Throughput: 0: 930.3. Samples: 242668. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:47:25,735][00556] Avg episode reward: [(0, '4.959')] +[2024-10-20 17:47:28,508][13107] Updated weights for policy 0, policy_version 1218 (0.0035) +[2024-10-20 17:47:30,732][00556] Fps is (10 sec: 4505.7, 60 sec: 3891.2, 300 sec: 3604.5). Total num frames: 4997120. Throughput: 0: 963.4. Samples: 246152. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:47:30,735][00556] Avg episode reward: [(0, '4.739')] +[2024-10-20 17:47:35,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3584.0). Total num frames: 5009408. Throughput: 0: 956.1. Samples: 251250. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:47:35,735][00556] Avg episode reward: [(0, '4.733')] +[2024-10-20 17:47:39,986][13107] Updated weights for policy 0, policy_version 1228 (0.0021) +[2024-10-20 17:47:40,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.8, 300 sec: 3593.0). Total num frames: 5029888. Throughput: 0: 929.8. Samples: 256830. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:47:40,734][00556] Avg episode reward: [(0, '4.928')] +[2024-10-20 17:47:45,732][00556] Fps is (10 sec: 4505.5, 60 sec: 3891.2, 300 sec: 3615.8). Total num frames: 5054464. Throughput: 0: 952.8. Samples: 260348. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:47:45,735][00556] Avg episode reward: [(0, '4.942')] +[2024-10-20 17:47:49,781][13107] Updated weights for policy 0, policy_version 1238 (0.0016) +[2024-10-20 17:47:50,734][00556] Fps is (10 sec: 4095.4, 60 sec: 3822.9, 300 sec: 3610.0). Total num frames: 5070848. Throughput: 0: 991.1. Samples: 266574. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:47:50,736][00556] Avg episode reward: [(0, '4.986')] +[2024-10-20 17:47:55,732][00556] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3665.6). Total num frames: 5087232. Throughput: 0: 936.4. Samples: 270996. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:47:55,735][00556] Avg episode reward: [(0, '4.664')] +[2024-10-20 17:48:00,480][13107] Updated weights for policy 0, policy_version 1248 (0.0018) +[2024-10-20 17:48:00,732][00556] Fps is (10 sec: 4096.6, 60 sec: 3891.6, 300 sec: 3748.9). Total num frames: 5111808. Throughput: 0: 939.4. Samples: 274492. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:48:00,735][00556] Avg episode reward: [(0, '4.776')] +[2024-10-20 17:48:05,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 5132288. Throughput: 0: 1007.4. Samples: 281752. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:48:05,741][00556] Avg episode reward: [(0, '5.045')] +[2024-10-20 17:48:10,740][00556] Fps is (10 sec: 3683.3, 60 sec: 3822.4, 300 sec: 3790.4). Total num frames: 5148672. Throughput: 0: 970.0. Samples: 286328. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-10-20 17:48:10,743][00556] Avg episode reward: [(0, '4.957')] +[2024-10-20 17:48:11,795][13107] Updated weights for policy 0, policy_version 1258 (0.0038) +[2024-10-20 17:48:15,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 5169152. Throughput: 0: 960.3. Samples: 289364. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:48:15,735][00556] Avg episode reward: [(0, '5.179')] +[2024-10-20 17:48:20,164][13107] Updated weights for policy 0, policy_version 1268 (0.0036) +[2024-10-20 17:48:20,732][00556] Fps is (10 sec: 4509.4, 60 sec: 4027.8, 300 sec: 3832.2). Total num frames: 5193728. Throughput: 0: 1008.7. Samples: 296640. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:48:20,737][00556] Avg episode reward: [(0, '4.946')] +[2024-10-20 17:48:25,734][00556] Fps is (10 sec: 4095.2, 60 sec: 3959.3, 300 sec: 3804.4). Total num frames: 5210112. Throughput: 0: 1002.7. Samples: 301954. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:48:25,738][00556] Avg episode reward: [(0, '4.618')] +[2024-10-20 17:48:30,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 5230592. Throughput: 0: 972.6. Samples: 304114. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:48:30,734][00556] Avg episode reward: [(0, '4.662')] +[2024-10-20 17:48:31,541][13107] Updated weights for policy 0, policy_version 1278 (0.0025) +[2024-10-20 17:48:35,732][00556] Fps is (10 sec: 4096.8, 60 sec: 4027.7, 300 sec: 3832.2). Total num frames: 5251072. Throughput: 0: 985.9. Samples: 310940. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:48:35,736][00556] Avg episode reward: [(0, '4.775')] +[2024-10-20 17:48:35,746][13094] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001282_5251072.pth... +[2024-10-20 17:48:35,875][13094] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001057_4329472.pth +[2024-10-20 17:48:40,733][00556] Fps is (10 sec: 4095.5, 60 sec: 4027.6, 300 sec: 3832.2). Total num frames: 5271552. Throughput: 0: 1030.5. Samples: 317370. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:48:40,736][00556] Avg episode reward: [(0, '4.752')] +[2024-10-20 17:48:41,090][13107] Updated weights for policy 0, policy_version 1288 (0.0025) +[2024-10-20 17:48:45,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 5287936. Throughput: 0: 999.7. Samples: 319480. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:48:45,735][00556] Avg episode reward: [(0, '4.800')] +[2024-10-20 17:48:50,732][00556] Fps is (10 sec: 3686.9, 60 sec: 3959.6, 300 sec: 3846.1). Total num frames: 5308416. Throughput: 0: 969.8. Samples: 325392. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:48:50,734][00556] Avg episode reward: [(0, '5.022')] +[2024-10-20 17:48:51,864][13107] Updated weights for policy 0, policy_version 1298 (0.0022) +[2024-10-20 17:48:55,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3860.0). Total num frames: 5332992. Throughput: 0: 1024.9. Samples: 332440. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:48:55,735][00556] Avg episode reward: [(0, '4.784')] +[2024-10-20 17:49:00,733][00556] Fps is (10 sec: 4095.5, 60 sec: 3959.4, 300 sec: 3832.2). Total num frames: 5349376. Throughput: 0: 1008.9. Samples: 334768. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:49:00,739][00556] Avg episode reward: [(0, '4.588')] +[2024-10-20 17:49:03,274][13107] Updated weights for policy 0, policy_version 1308 (0.0053) +[2024-10-20 17:49:05,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 5365760. Throughput: 0: 959.0. Samples: 339796. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:49:05,734][00556] Avg episode reward: [(0, '4.803')] +[2024-10-20 17:49:10,732][00556] Fps is (10 sec: 4096.5, 60 sec: 4028.3, 300 sec: 3860.0). Total num frames: 5390336. Throughput: 0: 997.6. Samples: 346846. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:49:10,738][00556] Avg episode reward: [(0, '4.814')] +[2024-10-20 17:49:11,859][13107] Updated weights for policy 0, policy_version 1318 (0.0016) +[2024-10-20 17:49:15,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3860.0). Total num frames: 5410816. Throughput: 0: 1024.0. Samples: 350194. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:49:15,736][00556] Avg episode reward: [(0, '4.620')] +[2024-10-20 17:49:20,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 5427200. Throughput: 0: 968.3. Samples: 354512. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:49:20,738][00556] Avg episode reward: [(0, '4.624')] +[2024-10-20 17:49:23,191][13107] Updated weights for policy 0, policy_version 1328 (0.0024) +[2024-10-20 17:49:25,732][00556] Fps is (10 sec: 3686.3, 60 sec: 3959.6, 300 sec: 3873.8). Total num frames: 5447680. Throughput: 0: 974.1. Samples: 361204. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:49:25,737][00556] Avg episode reward: [(0, '4.872')] +[2024-10-20 17:49:30,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 5468160. Throughput: 0: 1006.0. Samples: 364748. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:49:30,734][00556] Avg episode reward: [(0, '4.775')] +[2024-10-20 17:49:33,580][13107] Updated weights for policy 0, policy_version 1338 (0.0028) +[2024-10-20 17:49:35,732][00556] Fps is (10 sec: 3686.5, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 5484544. Throughput: 0: 985.7. Samples: 369750. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:49:35,735][00556] Avg episode reward: [(0, '4.776')] +[2024-10-20 17:49:40,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3873.8). Total num frames: 5505024. Throughput: 0: 961.6. Samples: 375712. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:49:40,737][00556] Avg episode reward: [(0, '5.088')] +[2024-10-20 17:49:43,592][13107] Updated weights for policy 0, policy_version 1348 (0.0027) +[2024-10-20 17:49:45,732][00556] Fps is (10 sec: 4505.5, 60 sec: 4027.7, 300 sec: 3873.8). Total num frames: 5529600. Throughput: 0: 987.1. Samples: 379188. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:49:45,734][00556] Avg episode reward: [(0, '5.245')] +[2024-10-20 17:49:50,733][00556] Fps is (10 sec: 4095.6, 60 sec: 3959.4, 300 sec: 3860.0). Total num frames: 5545984. Throughput: 0: 1010.1. Samples: 385250. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:49:50,739][00556] Avg episode reward: [(0, '5.274')] +[2024-10-20 17:49:54,744][13107] Updated weights for policy 0, policy_version 1358 (0.0030) +[2024-10-20 17:49:55,733][00556] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 5562368. Throughput: 0: 962.5. Samples: 390160. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:49:55,737][00556] Avg episode reward: [(0, '5.287')] +[2024-10-20 17:50:00,732][00556] Fps is (10 sec: 4096.4, 60 sec: 3959.6, 300 sec: 3887.8). Total num frames: 5586944. Throughput: 0: 968.8. Samples: 393792. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:50:00,737][00556] Avg episode reward: [(0, '5.399')] +[2024-10-20 17:50:03,384][13107] Updated weights for policy 0, policy_version 1368 (0.0027) +[2024-10-20 17:50:05,732][00556] Fps is (10 sec: 4505.8, 60 sec: 4027.7, 300 sec: 3873.8). Total num frames: 5607424. Throughput: 0: 1029.7. Samples: 400850. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:50:05,735][00556] Avg episode reward: [(0, '5.498')] +[2024-10-20 17:50:05,742][13094] Saving new best policy, reward=5.498! +[2024-10-20 17:50:10,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 5623808. Throughput: 0: 973.8. Samples: 405024. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:50:10,736][00556] Avg episode reward: [(0, '5.584')] +[2024-10-20 17:50:10,741][13094] Saving new best policy, reward=5.584! +[2024-10-20 17:50:15,055][13107] Updated weights for policy 0, policy_version 1378 (0.0025) +[2024-10-20 17:50:15,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 5644288. Throughput: 0: 965.0. Samples: 408172. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:50:15,736][00556] Avg episode reward: [(0, '5.867')] +[2024-10-20 17:50:15,746][13094] Saving new best policy, reward=5.867! +[2024-10-20 17:50:20,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3901.6). Total num frames: 5668864. Throughput: 0: 1009.1. Samples: 415160. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:50:20,739][00556] Avg episode reward: [(0, '6.243')] +[2024-10-20 17:50:20,741][13094] Saving new best policy, reward=6.243! +[2024-10-20 17:50:25,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 5681152. Throughput: 0: 985.4. Samples: 420054. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:50:25,735][00556] Avg episode reward: [(0, '6.490')] +[2024-10-20 17:50:25,746][13094] Saving new best policy, reward=6.490! +[2024-10-20 17:50:26,103][13107] Updated weights for policy 0, policy_version 1388 (0.0044) +[2024-10-20 17:50:30,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 5701632. Throughput: 0: 952.6. Samples: 422056. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:50:30,735][00556] Avg episode reward: [(0, '6.226')] +[2024-10-20 17:50:35,566][13107] Updated weights for policy 0, policy_version 1398 (0.0033) +[2024-10-20 17:50:35,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 5726208. Throughput: 0: 973.4. Samples: 429050. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:50:35,736][00556] Avg episode reward: [(0, '6.557')] +[2024-10-20 17:50:35,747][13094] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001398_5726208.pth... +[2024-10-20 17:50:35,871][13094] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001169_4788224.pth +[2024-10-20 17:50:35,892][13094] Saving new best policy, reward=6.557! +[2024-10-20 17:50:40,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 5742592. Throughput: 0: 995.5. Samples: 434958. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:50:40,735][00556] Avg episode reward: [(0, '6.296')] +[2024-10-20 17:50:45,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3887.7). Total num frames: 5758976. Throughput: 0: 960.9. Samples: 437034. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:50:45,735][00556] Avg episode reward: [(0, '6.602')] +[2024-10-20 17:50:45,745][13094] Saving new best policy, reward=6.602! +[2024-10-20 17:50:47,598][13107] Updated weights for policy 0, policy_version 1408 (0.0021) +[2024-10-20 17:50:50,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3901.6). Total num frames: 5779456. Throughput: 0: 937.5. Samples: 443036. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:50:50,734][00556] Avg episode reward: [(0, '6.584')] +[2024-10-20 17:50:55,734][00556] Fps is (10 sec: 4504.9, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 5804032. Throughput: 0: 998.8. Samples: 449972. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:50:55,737][00556] Avg episode reward: [(0, '6.490')] +[2024-10-20 17:50:56,998][13107] Updated weights for policy 0, policy_version 1418 (0.0025) +[2024-10-20 17:51:00,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 5816320. Throughput: 0: 974.4. Samples: 452022. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:51:00,743][00556] Avg episode reward: [(0, '6.575')] +[2024-10-20 17:51:05,732][00556] Fps is (10 sec: 3277.2, 60 sec: 3822.9, 300 sec: 3901.7). Total num frames: 5836800. Throughput: 0: 930.6. Samples: 457038. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:51:05,735][00556] Avg episode reward: [(0, '6.107')] +[2024-10-20 17:51:08,026][13107] Updated weights for policy 0, policy_version 1428 (0.0030) +[2024-10-20 17:51:10,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 5861376. Throughput: 0: 977.5. Samples: 464040. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:51:10,735][00556] Avg episode reward: [(0, '5.802')] +[2024-10-20 17:51:15,732][00556] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 5877760. Throughput: 0: 1003.8. Samples: 467228. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:51:15,737][00556] Avg episode reward: [(0, '5.971')] +[2024-10-20 17:51:19,422][13107] Updated weights for policy 0, policy_version 1438 (0.0018) +[2024-10-20 17:51:20,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3887.7). Total num frames: 5894144. Throughput: 0: 942.0. Samples: 471438. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-10-20 17:51:20,735][00556] Avg episode reward: [(0, '6.567')] +[2024-10-20 17:51:25,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 5914624. Throughput: 0: 961.2. Samples: 478214. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:51:25,735][00556] Avg episode reward: [(0, '6.978')] +[2024-10-20 17:51:25,744][13094] Saving new best policy, reward=6.978! +[2024-10-20 17:51:28,766][13107] Updated weights for policy 0, policy_version 1448 (0.0013) +[2024-10-20 17:51:30,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 5935104. Throughput: 0: 986.1. Samples: 481408. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:51:30,740][00556] Avg episode reward: [(0, '7.457')] +[2024-10-20 17:51:30,745][13094] Saving new best policy, reward=7.457! +[2024-10-20 17:51:35,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3887.8). Total num frames: 5951488. Throughput: 0: 959.6. Samples: 486220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:51:35,737][00556] Avg episode reward: [(0, '7.749')] +[2024-10-20 17:51:35,744][13094] Saving new best policy, reward=7.749! +[2024-10-20 17:51:40,097][13107] Updated weights for policy 0, policy_version 1458 (0.0026) +[2024-10-20 17:51:40,733][00556] Fps is (10 sec: 3686.2, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 5971968. Throughput: 0: 939.0. Samples: 492224. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:51:40,738][00556] Avg episode reward: [(0, '7.325')] +[2024-10-20 17:51:45,732][00556] Fps is (10 sec: 4505.7, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 5996544. Throughput: 0: 973.1. Samples: 495810. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:51:45,739][00556] Avg episode reward: [(0, '6.764')] +[2024-10-20 17:51:50,434][13107] Updated weights for policy 0, policy_version 1468 (0.0015) +[2024-10-20 17:51:50,732][00556] Fps is (10 sec: 4096.2, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 6012928. Throughput: 0: 990.3. Samples: 501600. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:51:50,736][00556] Avg episode reward: [(0, '7.142')] +[2024-10-20 17:51:55,734][00556] Fps is (10 sec: 3276.1, 60 sec: 3754.6, 300 sec: 3901.7). Total num frames: 6029312. Throughput: 0: 944.0. Samples: 506520. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:51:55,740][00556] Avg episode reward: [(0, '7.097')] +[2024-10-20 17:52:00,570][13107] Updated weights for policy 0, policy_version 1478 (0.0019) +[2024-10-20 17:52:00,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 6053888. Throughput: 0: 947.4. Samples: 509860. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:52:00,735][00556] Avg episode reward: [(0, '7.629')] +[2024-10-20 17:52:05,732][00556] Fps is (10 sec: 4096.8, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 6070272. Throughput: 0: 1003.6. Samples: 516600. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:52:05,736][00556] Avg episode reward: [(0, '8.297')] +[2024-10-20 17:52:05,750][13094] Saving new best policy, reward=8.297! +[2024-10-20 17:52:10,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 6086656. Throughput: 0: 945.0. Samples: 520738. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:52:10,738][00556] Avg episode reward: [(0, '8.526')] +[2024-10-20 17:52:10,743][13094] Saving new best policy, reward=8.526! +[2024-10-20 17:52:12,390][13107] Updated weights for policy 0, policy_version 1488 (0.0021) +[2024-10-20 17:52:15,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 6107136. Throughput: 0: 945.6. Samples: 523962. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:52:15,741][00556] Avg episode reward: [(0, '8.491')] +[2024-10-20 17:52:20,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 6131712. Throughput: 0: 995.4. Samples: 531014. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:52:20,740][00556] Avg episode reward: [(0, '8.152')] +[2024-10-20 17:52:21,240][13107] Updated weights for policy 0, policy_version 1498 (0.0029) +[2024-10-20 17:52:25,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 6148096. Throughput: 0: 970.7. Samples: 535904. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:52:25,737][00556] Avg episode reward: [(0, '8.798')] +[2024-10-20 17:52:25,751][13094] Saving new best policy, reward=8.798! +[2024-10-20 17:52:30,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 6164480. Throughput: 0: 939.6. Samples: 538094. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:52:30,740][00556] Avg episode reward: [(0, '8.482')] +[2024-10-20 17:52:32,719][13107] Updated weights for policy 0, policy_version 1508 (0.0032) +[2024-10-20 17:52:35,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 6189056. Throughput: 0: 968.9. Samples: 545202. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:52:35,735][00556] Avg episode reward: [(0, '9.598')] +[2024-10-20 17:52:35,746][13094] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001511_6189056.pth... +[2024-10-20 17:52:35,868][13094] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001282_5251072.pth +[2024-10-20 17:52:35,885][13094] Saving new best policy, reward=9.598! +[2024-10-20 17:52:40,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 6205440. Throughput: 0: 987.4. Samples: 550952. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:52:40,735][00556] Avg episode reward: [(0, '9.612')] +[2024-10-20 17:52:40,741][13094] Saving new best policy, reward=9.612! +[2024-10-20 17:52:44,111][13107] Updated weights for policy 0, policy_version 1518 (0.0016) +[2024-10-20 17:52:45,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 6221824. Throughput: 0: 957.3. Samples: 552938. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:52:45,737][00556] Avg episode reward: [(0, '10.551')] +[2024-10-20 17:52:45,745][13094] Saving new best policy, reward=10.551! +[2024-10-20 17:52:50,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3929.4). Total num frames: 6246400. Throughput: 0: 944.1. Samples: 559084. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:52:50,734][00556] Avg episode reward: [(0, '10.450')] +[2024-10-20 17:52:53,445][13107] Updated weights for policy 0, policy_version 1528 (0.0016) +[2024-10-20 17:52:55,733][00556] Fps is (10 sec: 4505.4, 60 sec: 3959.6, 300 sec: 3915.5). Total num frames: 6266880. Throughput: 0: 1005.5. Samples: 565988. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:52:55,738][00556] Avg episode reward: [(0, '9.111')] +[2024-10-20 17:53:00,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3887.7). Total num frames: 6279168. Throughput: 0: 978.0. Samples: 567972. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:53:00,736][00556] Avg episode reward: [(0, '8.549')] +[2024-10-20 17:53:05,162][13107] Updated weights for policy 0, policy_version 1538 (0.0024) +[2024-10-20 17:53:05,732][00556] Fps is (10 sec: 3276.9, 60 sec: 3822.9, 300 sec: 3901.7). Total num frames: 6299648. Throughput: 0: 935.6. Samples: 573114. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:53:05,737][00556] Avg episode reward: [(0, '9.168')] +[2024-10-20 17:53:10,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 6324224. Throughput: 0: 984.1. Samples: 580190. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:53:10,740][00556] Avg episode reward: [(0, '9.790')] +[2024-10-20 17:53:14,574][13107] Updated weights for policy 0, policy_version 1548 (0.0021) +[2024-10-20 17:53:15,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 6340608. Throughput: 0: 1005.1. Samples: 583322. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:53:15,735][00556] Avg episode reward: [(0, '10.452')] +[2024-10-20 17:53:20,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3887.8). Total num frames: 6356992. Throughput: 0: 942.8. Samples: 587628. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:53:20,740][00556] Avg episode reward: [(0, '11.112')] +[2024-10-20 17:53:20,742][13094] Saving new best policy, reward=11.112! +[2024-10-20 17:53:25,139][13107] Updated weights for policy 0, policy_version 1558 (0.0041) +[2024-10-20 17:53:25,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 6381568. Throughput: 0: 971.9. Samples: 594686. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:53:25,734][00556] Avg episode reward: [(0, '11.859')] +[2024-10-20 17:53:25,757][13094] Saving new best policy, reward=11.859! +[2024-10-20 17:53:30,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 6402048. Throughput: 0: 999.8. Samples: 597928. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:53:30,740][00556] Avg episode reward: [(0, '11.930')] +[2024-10-20 17:53:30,743][13094] Saving new best policy, reward=11.930! +[2024-10-20 17:53:35,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3873.9). Total num frames: 6414336. Throughput: 0: 966.6. Samples: 602582. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:53:35,738][00556] Avg episode reward: [(0, '10.570')] +[2024-10-20 17:53:37,140][13107] Updated weights for policy 0, policy_version 1568 (0.0035) +[2024-10-20 17:53:40,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 6438912. Throughput: 0: 945.2. Samples: 608520. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:53:40,742][00556] Avg episode reward: [(0, '10.837')] +[2024-10-20 17:53:45,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 6459392. Throughput: 0: 977.3. Samples: 611952. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:53:45,740][00556] Avg episode reward: [(0, '11.957')] +[2024-10-20 17:53:45,751][13094] Saving new best policy, reward=11.957! +[2024-10-20 17:53:45,988][13107] Updated weights for policy 0, policy_version 1578 (0.0023) +[2024-10-20 17:53:50,734][00556] Fps is (10 sec: 3685.5, 60 sec: 3822.8, 300 sec: 3873.8). Total num frames: 6475776. Throughput: 0: 992.8. Samples: 617792. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:53:50,743][00556] Avg episode reward: [(0, '12.707')] +[2024-10-20 17:53:50,745][13094] Saving new best policy, reward=12.707! +[2024-10-20 17:53:55,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3873.9). Total num frames: 6492160. Throughput: 0: 940.9. Samples: 622530. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:53:55,734][00556] Avg episode reward: [(0, '13.675')] +[2024-10-20 17:53:55,755][13094] Saving new best policy, reward=13.675! +[2024-10-20 17:53:57,683][13107] Updated weights for policy 0, policy_version 1588 (0.0013) +[2024-10-20 17:54:00,735][00556] Fps is (10 sec: 4095.6, 60 sec: 3959.2, 300 sec: 3901.6). Total num frames: 6516736. Throughput: 0: 945.4. Samples: 625866. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:54:00,737][00556] Avg episode reward: [(0, '13.761')] +[2024-10-20 17:54:00,741][13094] Saving new best policy, reward=13.761! +[2024-10-20 17:54:05,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 6537216. Throughput: 0: 997.6. Samples: 632522. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:54:05,738][00556] Avg episode reward: [(0, '13.863')] +[2024-10-20 17:54:05,756][13094] Saving new best policy, reward=13.863! +[2024-10-20 17:54:08,541][13107] Updated weights for policy 0, policy_version 1598 (0.0036) +[2024-10-20 17:54:10,732][00556] Fps is (10 sec: 3277.8, 60 sec: 3754.6, 300 sec: 3860.0). Total num frames: 6549504. Throughput: 0: 932.1. Samples: 636632. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-10-20 17:54:10,735][00556] Avg episode reward: [(0, '14.218')] +[2024-10-20 17:54:10,737][13094] Saving new best policy, reward=14.218! +[2024-10-20 17:54:15,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 6569984. Throughput: 0: 930.4. Samples: 639794. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:54:15,734][00556] Avg episode reward: [(0, '14.220')] +[2024-10-20 17:54:15,747][13094] Saving new best policy, reward=14.220! +[2024-10-20 17:54:18,422][13107] Updated weights for policy 0, policy_version 1608 (0.0026) +[2024-10-20 17:54:20,732][00556] Fps is (10 sec: 4505.7, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 6594560. Throughput: 0: 982.0. Samples: 646774. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:54:20,734][00556] Avg episode reward: [(0, '14.512')] +[2024-10-20 17:54:20,739][13094] Saving new best policy, reward=14.512! +[2024-10-20 17:54:25,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 6610944. Throughput: 0: 959.8. Samples: 651712. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:54:25,739][00556] Avg episode reward: [(0, '14.462')] +[2024-10-20 17:54:30,013][13107] Updated weights for policy 0, policy_version 1618 (0.0028) +[2024-10-20 17:54:30,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3873.8). Total num frames: 6627328. Throughput: 0: 932.1. Samples: 653896. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-10-20 17:54:30,736][00556] Avg episode reward: [(0, '15.060')] +[2024-10-20 17:54:30,740][13094] Saving new best policy, reward=15.060! +[2024-10-20 17:54:35,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 6651904. Throughput: 0: 958.6. Samples: 660928. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:54:35,738][00556] Avg episode reward: [(0, '14.106')] +[2024-10-20 17:54:35,750][13094] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001624_6651904.pth... +[2024-10-20 17:54:35,903][13094] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001398_5726208.pth +[2024-10-20 17:54:39,475][13107] Updated weights for policy 0, policy_version 1628 (0.0018) +[2024-10-20 17:54:40,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 6668288. Throughput: 0: 980.0. Samples: 666632. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:54:40,735][00556] Avg episode reward: [(0, '14.228')] +[2024-10-20 17:54:45,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 6684672. Throughput: 0: 950.9. Samples: 668652. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:54:45,734][00556] Avg episode reward: [(0, '13.322')] +[2024-10-20 17:54:50,415][13107] Updated weights for policy 0, policy_version 1638 (0.0023) +[2024-10-20 17:54:50,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.4, 300 sec: 3887.7). Total num frames: 6709248. Throughput: 0: 948.8. Samples: 675220. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:54:50,740][00556] Avg episode reward: [(0, '13.764')] +[2024-10-20 17:54:55,738][00556] Fps is (10 sec: 4503.0, 60 sec: 3959.1, 300 sec: 3873.8). Total num frames: 6729728. Throughput: 0: 1006.0. Samples: 681908. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:54:55,745][00556] Avg episode reward: [(0, '13.888')] +[2024-10-20 17:55:00,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3823.2, 300 sec: 3860.0). Total num frames: 6746112. Throughput: 0: 982.0. Samples: 683982. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:55:00,737][00556] Avg episode reward: [(0, '13.959')] +[2024-10-20 17:55:01,903][13107] Updated weights for policy 0, policy_version 1648 (0.0035) +[2024-10-20 17:55:05,732][00556] Fps is (10 sec: 3688.5, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 6766592. Throughput: 0: 949.7. Samples: 689510. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:55:05,735][00556] Avg episode reward: [(0, '15.279')] +[2024-10-20 17:55:05,745][13094] Saving new best policy, reward=15.279! +[2024-10-20 17:55:10,588][13107] Updated weights for policy 0, policy_version 1658 (0.0026) +[2024-10-20 17:55:10,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.8, 300 sec: 3887.7). Total num frames: 6791168. Throughput: 0: 996.8. Samples: 696568. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:55:10,737][00556] Avg episode reward: [(0, '15.888')] +[2024-10-20 17:55:10,741][13094] Saving new best policy, reward=15.888! +[2024-10-20 17:55:15,732][00556] Fps is (10 sec: 4095.9, 60 sec: 3959.4, 300 sec: 3860.0). Total num frames: 6807552. Throughput: 0: 1008.4. Samples: 699274. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:55:15,737][00556] Avg episode reward: [(0, '16.846')] +[2024-10-20 17:55:15,747][13094] Saving new best policy, reward=16.846! +[2024-10-20 17:55:20,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 6823936. Throughput: 0: 947.6. Samples: 703570. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-10-20 17:55:20,735][00556] Avg episode reward: [(0, '17.097')] +[2024-10-20 17:55:20,739][13094] Saving new best policy, reward=17.097! +[2024-10-20 17:55:22,349][13107] Updated weights for policy 0, policy_version 1668 (0.0036) +[2024-10-20 17:55:25,732][00556] Fps is (10 sec: 3686.5, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 6844416. Throughput: 0: 977.3. Samples: 710610. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:55:25,742][00556] Avg episode reward: [(0, '17.080')] +[2024-10-20 17:55:30,733][00556] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 6864896. Throughput: 0: 1010.6. Samples: 714128. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:55:30,739][00556] Avg episode reward: [(0, '18.704')] +[2024-10-20 17:55:30,741][13094] Saving new best policy, reward=18.704! +[2024-10-20 17:55:32,953][13107] Updated weights for policy 0, policy_version 1678 (0.0026) +[2024-10-20 17:55:35,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 6877184. Throughput: 0: 959.3. Samples: 718390. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:55:35,735][00556] Avg episode reward: [(0, '18.853')] +[2024-10-20 17:55:35,748][13094] Saving new best policy, reward=18.853! +[2024-10-20 17:55:40,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 6901760. Throughput: 0: 949.7. Samples: 724638. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:55:40,734][00556] Avg episode reward: [(0, '19.383')] +[2024-10-20 17:55:40,741][13094] Saving new best policy, reward=19.383! +[2024-10-20 17:55:43,097][13107] Updated weights for policy 0, policy_version 1688 (0.0018) +[2024-10-20 17:55:45,732][00556] Fps is (10 sec: 4915.1, 60 sec: 4027.7, 300 sec: 3887.7). Total num frames: 6926336. Throughput: 0: 977.9. Samples: 727986. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:55:45,738][00556] Avg episode reward: [(0, '19.203')] +[2024-10-20 17:55:50,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 6938624. Throughput: 0: 976.5. Samples: 733454. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:55:50,738][00556] Avg episode reward: [(0, '18.951')] +[2024-10-20 17:55:54,498][13107] Updated weights for policy 0, policy_version 1698 (0.0033) +[2024-10-20 17:55:55,732][00556] Fps is (10 sec: 3276.9, 60 sec: 3823.3, 300 sec: 3873.8). Total num frames: 6959104. Throughput: 0: 937.4. Samples: 738750. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:55:55,738][00556] Avg episode reward: [(0, '17.945')] +[2024-10-20 17:56:00,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 6983680. Throughput: 0: 955.2. Samples: 742256. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:56:00,735][00556] Avg episode reward: [(0, '17.158')] +[2024-10-20 17:56:03,638][13107] Updated weights for policy 0, policy_version 1708 (0.0025) +[2024-10-20 17:56:05,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7000064. Throughput: 0: 997.9. Samples: 748474. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-10-20 17:56:05,735][00556] Avg episode reward: [(0, '15.689')] +[2024-10-20 17:56:10,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 7016448. Throughput: 0: 940.0. Samples: 752908. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-10-20 17:56:10,736][00556] Avg episode reward: [(0, '16.469')] +[2024-10-20 17:56:15,045][13107] Updated weights for policy 0, policy_version 1718 (0.0025) +[2024-10-20 17:56:15,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 7036928. Throughput: 0: 938.5. Samples: 756362. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:56:15,738][00556] Avg episode reward: [(0, '16.657')] +[2024-10-20 17:56:20,732][00556] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 7061504. Throughput: 0: 1001.6. Samples: 763460. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:56:20,744][00556] Avg episode reward: [(0, '16.841')] +[2024-10-20 17:56:25,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 7073792. Throughput: 0: 962.0. Samples: 767928. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:56:25,738][00556] Avg episode reward: [(0, '17.006')] +[2024-10-20 17:56:26,322][13107] Updated weights for policy 0, policy_version 1728 (0.0025) +[2024-10-20 17:56:30,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 7094272. Throughput: 0: 950.5. Samples: 770758. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:56:30,734][00556] Avg episode reward: [(0, '17.406')] +[2024-10-20 17:56:35,323][13107] Updated weights for policy 0, policy_version 1738 (0.0018) +[2024-10-20 17:56:35,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3887.7). Total num frames: 7118848. Throughput: 0: 983.1. Samples: 777694. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:56:35,735][00556] Avg episode reward: [(0, '18.076')] +[2024-10-20 17:56:35,747][13094] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001738_7118848.pth... +[2024-10-20 17:56:35,904][13094] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001511_6189056.pth +[2024-10-20 17:56:40,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7135232. Throughput: 0: 983.0. Samples: 782986. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:56:40,734][00556] Avg episode reward: [(0, '18.765')] +[2024-10-20 17:56:45,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 7151616. Throughput: 0: 953.2. Samples: 785150. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:56:45,741][00556] Avg episode reward: [(0, '18.470')] +[2024-10-20 17:56:46,893][13107] Updated weights for policy 0, policy_version 1748 (0.0041) +[2024-10-20 17:56:50,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3887.8). Total num frames: 7176192. Throughput: 0: 969.6. Samples: 792106. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:56:50,734][00556] Avg episode reward: [(0, '18.238')] +[2024-10-20 17:56:55,732][00556] Fps is (10 sec: 4505.7, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 7196672. Throughput: 0: 1013.0. Samples: 798494. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:56:55,736][00556] Avg episode reward: [(0, '17.947')] +[2024-10-20 17:56:56,476][13107] Updated weights for policy 0, policy_version 1758 (0.0029) +[2024-10-20 17:57:00,735][00556] Fps is (10 sec: 3276.0, 60 sec: 3754.5, 300 sec: 3859.9). Total num frames: 7208960. Throughput: 0: 982.5. Samples: 800576. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:57:00,741][00556] Avg episode reward: [(0, '17.435')] +[2024-10-20 17:57:05,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 7233536. Throughput: 0: 953.6. Samples: 806374. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:57:05,735][00556] Avg episode reward: [(0, '16.878')] +[2024-10-20 17:57:07,120][13107] Updated weights for policy 0, policy_version 1768 (0.0029) +[2024-10-20 17:57:10,732][00556] Fps is (10 sec: 4916.4, 60 sec: 4027.7, 300 sec: 3901.6). Total num frames: 7258112. Throughput: 0: 1013.1. Samples: 813516. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:57:10,735][00556] Avg episode reward: [(0, '16.118')] +[2024-10-20 17:57:15,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7270400. Throughput: 0: 1005.9. Samples: 816024. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:57:15,734][00556] Avg episode reward: [(0, '16.678')] +[2024-10-20 17:57:18,428][13107] Updated weights for policy 0, policy_version 1778 (0.0031) +[2024-10-20 17:57:20,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 7290880. Throughput: 0: 958.8. Samples: 820838. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-10-20 17:57:20,734][00556] Avg episode reward: [(0, '16.217')] +[2024-10-20 17:57:25,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3901.6). Total num frames: 7315456. Throughput: 0: 1001.6. Samples: 828056. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:57:25,735][00556] Avg episode reward: [(0, '17.508')] +[2024-10-20 17:57:27,042][13107] Updated weights for policy 0, policy_version 1788 (0.0016) +[2024-10-20 17:57:30,732][00556] Fps is (10 sec: 4505.4, 60 sec: 4027.7, 300 sec: 3887.7). Total num frames: 7335936. Throughput: 0: 1028.8. Samples: 831446. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:57:30,735][00556] Avg episode reward: [(0, '18.528')] +[2024-10-20 17:57:35,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 7348224. Throughput: 0: 967.3. Samples: 835636. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:57:35,734][00556] Avg episode reward: [(0, '19.123')] +[2024-10-20 17:57:38,739][13107] Updated weights for policy 0, policy_version 1798 (0.0034) +[2024-10-20 17:57:40,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3959.4, 300 sec: 3901.6). Total num frames: 7372800. Throughput: 0: 972.8. Samples: 842272. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:57:40,739][00556] Avg episode reward: [(0, '19.286')] +[2024-10-20 17:57:45,732][00556] Fps is (10 sec: 4915.1, 60 sec: 4096.0, 300 sec: 3901.6). Total num frames: 7397376. Throughput: 0: 1007.3. Samples: 845900. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:57:45,739][00556] Avg episode reward: [(0, '17.974')] +[2024-10-20 17:57:48,262][13107] Updated weights for policy 0, policy_version 1808 (0.0019) +[2024-10-20 17:57:50,740][00556] Fps is (10 sec: 3683.8, 60 sec: 3890.7, 300 sec: 3873.8). Total num frames: 7409664. Throughput: 0: 996.2. Samples: 851212. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:57:50,743][00556] Avg episode reward: [(0, '18.386')] +[2024-10-20 17:57:55,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 7430144. Throughput: 0: 962.8. Samples: 856844. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:57:55,735][00556] Avg episode reward: [(0, '18.999')] +[2024-10-20 17:57:58,586][13107] Updated weights for policy 0, policy_version 1818 (0.0019) +[2024-10-20 17:58:00,732][00556] Fps is (10 sec: 4508.9, 60 sec: 4096.2, 300 sec: 3915.5). Total num frames: 7454720. Throughput: 0: 987.2. Samples: 860446. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:58:00,735][00556] Avg episode reward: [(0, '19.445')] +[2024-10-20 17:58:00,738][13094] Saving new best policy, reward=19.445! +[2024-10-20 17:58:05,732][00556] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 7471104. Throughput: 0: 1015.7. Samples: 866546. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:58:05,734][00556] Avg episode reward: [(0, '19.949')] +[2024-10-20 17:58:05,745][13094] Saving new best policy, reward=19.949! +[2024-10-20 17:58:10,105][13107] Updated weights for policy 0, policy_version 1828 (0.0031) +[2024-10-20 17:58:10,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3887.7). Total num frames: 7487488. Throughput: 0: 957.6. Samples: 871146. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:58:10,734][00556] Avg episode reward: [(0, '20.576')] +[2024-10-20 17:58:10,737][13094] Saving new best policy, reward=20.576! +[2024-10-20 17:58:15,732][00556] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 7512064. Throughput: 0: 958.5. Samples: 874576. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:58:15,735][00556] Avg episode reward: [(0, '20.085')] +[2024-10-20 17:58:18,915][13107] Updated weights for policy 0, policy_version 1838 (0.0021) +[2024-10-20 17:58:20,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3901.6). Total num frames: 7532544. Throughput: 0: 1025.1. Samples: 881764. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:58:20,738][00556] Avg episode reward: [(0, '18.821')] +[2024-10-20 17:58:25,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 7548928. Throughput: 0: 974.8. Samples: 886138. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:58:25,736][00556] Avg episode reward: [(0, '17.704')] +[2024-10-20 17:58:30,314][13107] Updated weights for policy 0, policy_version 1848 (0.0040) +[2024-10-20 17:58:30,733][00556] Fps is (10 sec: 3686.1, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 7569408. Throughput: 0: 961.9. Samples: 889184. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 17:58:30,738][00556] Avg episode reward: [(0, '17.782')] +[2024-10-20 17:58:35,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3915.5). Total num frames: 7593984. Throughput: 0: 999.7. Samples: 896190. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:58:35,739][00556] Avg episode reward: [(0, '19.275')] +[2024-10-20 17:58:35,751][13094] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001854_7593984.pth... +[2024-10-20 17:58:35,903][13094] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001624_6651904.pth +[2024-10-20 17:58:40,732][00556] Fps is (10 sec: 3686.7, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 7606272. Throughput: 0: 989.3. Samples: 901364. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:58:40,734][00556] Avg episode reward: [(0, '20.032')] +[2024-10-20 17:58:40,955][13107] Updated weights for policy 0, policy_version 1858 (0.0018) +[2024-10-20 17:58:45,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 7626752. Throughput: 0: 958.1. Samples: 903562. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:58:45,736][00556] Avg episode reward: [(0, '20.103')] +[2024-10-20 17:58:50,329][13107] Updated weights for policy 0, policy_version 1868 (0.0031) +[2024-10-20 17:58:50,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4028.2, 300 sec: 3929.4). Total num frames: 7651328. Throughput: 0: 980.7. Samples: 910676. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:58:50,735][00556] Avg episode reward: [(0, '20.564')] +[2024-10-20 17:58:55,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 7671808. Throughput: 0: 1016.5. Samples: 916888. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:58:55,741][00556] Avg episode reward: [(0, '18.745')] +[2024-10-20 17:59:00,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3887.7). Total num frames: 7684096. Throughput: 0: 987.6. Samples: 919018. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:59:00,739][00556] Avg episode reward: [(0, '19.110')] +[2024-10-20 17:59:01,841][13107] Updated weights for policy 0, policy_version 1878 (0.0028) +[2024-10-20 17:59:05,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 7708672. Throughput: 0: 965.9. Samples: 925230. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:59:05,737][00556] Avg episode reward: [(0, '17.969')] +[2024-10-20 17:59:10,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 7729152. Throughput: 0: 1024.9. Samples: 932260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-10-20 17:59:10,738][00556] Avg episode reward: [(0, '19.657')] +[2024-10-20 17:59:10,817][13107] Updated weights for policy 0, policy_version 1888 (0.0020) +[2024-10-20 17:59:15,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 7745536. Throughput: 0: 1004.7. Samples: 934396. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:59:15,739][00556] Avg episode reward: [(0, '19.439')] +[2024-10-20 17:59:20,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 7766016. Throughput: 0: 965.9. Samples: 939656. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:59:20,738][00556] Avg episode reward: [(0, '21.145')] +[2024-10-20 17:59:20,744][13094] Saving new best policy, reward=21.145! +[2024-10-20 17:59:21,944][13107] Updated weights for policy 0, policy_version 1898 (0.0044) +[2024-10-20 17:59:25,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3943.3). Total num frames: 7790592. Throughput: 0: 1008.4. Samples: 946744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:59:25,735][00556] Avg episode reward: [(0, '19.946')] +[2024-10-20 17:59:30,735][00556] Fps is (10 sec: 4095.0, 60 sec: 3959.4, 300 sec: 3915.5). Total num frames: 7806976. Throughput: 0: 1029.2. Samples: 949880. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:59:30,739][00556] Avg episode reward: [(0, '20.082')] +[2024-10-20 17:59:32,806][13107] Updated weights for policy 0, policy_version 1908 (0.0021) +[2024-10-20 17:59:35,732][00556] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 7823360. Throughput: 0: 963.6. Samples: 954040. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-10-20 17:59:35,739][00556] Avg episode reward: [(0, '19.578')] +[2024-10-20 17:59:40,732][00556] Fps is (10 sec: 4097.0, 60 sec: 4027.7, 300 sec: 3943.3). Total num frames: 7847936. Throughput: 0: 982.9. Samples: 961118. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:59:40,734][00556] Avg episode reward: [(0, '18.151')] +[2024-10-20 17:59:42,040][13107] Updated weights for policy 0, policy_version 1918 (0.0023) +[2024-10-20 17:59:45,732][00556] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 7868416. Throughput: 0: 1013.6. Samples: 964632. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-10-20 17:59:45,734][00556] Avg episode reward: [(0, '18.588')] +[2024-10-20 17:59:50,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3915.6). Total num frames: 7884800. Throughput: 0: 983.7. Samples: 969498. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:59:50,737][00556] Avg episode reward: [(0, '19.319')] +[2024-10-20 17:59:53,561][13107] Updated weights for policy 0, policy_version 1928 (0.0033) +[2024-10-20 17:59:55,732][00556] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3929.4). Total num frames: 7905280. Throughput: 0: 960.5. Samples: 975482. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 17:59:55,735][00556] Avg episode reward: [(0, '18.140')] +[2024-10-20 18:00:00,732][00556] Fps is (10 sec: 4505.5, 60 sec: 4096.0, 300 sec: 3943.3). Total num frames: 7929856. Throughput: 0: 992.4. Samples: 979054. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 18:00:00,739][00556] Avg episode reward: [(0, '19.109')] +[2024-10-20 18:00:02,625][13107] Updated weights for policy 0, policy_version 1938 (0.0030) +[2024-10-20 18:00:05,732][00556] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 7946240. Throughput: 0: 1004.9. Samples: 984876. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 18:00:05,739][00556] Avg episode reward: [(0, '19.511')] +[2024-10-20 18:00:10,732][00556] Fps is (10 sec: 3276.9, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 7962624. Throughput: 0: 960.2. Samples: 989954. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-10-20 18:00:10,735][00556] Avg episode reward: [(0, '19.482')] +[2024-10-20 18:00:13,666][13107] Updated weights for policy 0, policy_version 1948 (0.0033) +[2024-10-20 18:00:15,732][00556] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3943.3). Total num frames: 7987200. Throughput: 0: 971.3. Samples: 993584. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-10-20 18:00:15,734][00556] Avg episode reward: [(0, '22.004')] +[2024-10-20 18:00:15,749][13094] Saving new best policy, reward=22.004! +[2024-10-20 18:00:19,992][13094] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-10-20 18:00:20,004][13094] Stopping Batcher_0... +[2024-10-20 18:00:20,005][00556] Component Batcher_0 stopped! +[2024-10-20 18:00:20,006][13094] Loop batcher_evt_loop terminating... +[2024-10-20 18:00:20,088][13107] Weights refcount: 2 0 +[2024-10-20 18:00:20,095][00556] Component InferenceWorker_p0-w0 stopped! +[2024-10-20 18:00:20,100][13107] Stopping InferenceWorker_p0-w0... +[2024-10-20 18:00:20,100][13107] Loop inference_proc0-0_evt_loop terminating... +[2024-10-20 18:00:20,230][13094] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001738_7118848.pth +[2024-10-20 18:00:20,255][13094] Saving new best policy, reward=22.116! +[2024-10-20 18:00:20,486][13094] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-10-20 18:00:20,675][00556] Component LearnerWorker_p0 stopped! +[2024-10-20 18:00:20,681][13094] Stopping LearnerWorker_p0... +[2024-10-20 18:00:20,682][13094] Loop learner_proc0_evt_loop terminating... +[2024-10-20 18:00:20,747][00556] Component RolloutWorker_w6 stopped! +[2024-10-20 18:00:20,752][13114] Stopping RolloutWorker_w6... +[2024-10-20 18:00:20,753][13114] Loop rollout_proc6_evt_loop terminating... +[2024-10-20 18:00:20,809][00556] Component RolloutWorker_w2 stopped! +[2024-10-20 18:00:20,814][13110] Stopping RolloutWorker_w2... +[2024-10-20 18:00:20,815][13110] Loop rollout_proc2_evt_loop terminating... +[2024-10-20 18:00:20,859][00556] Component RolloutWorker_w1 stopped! +[2024-10-20 18:00:20,859][13109] Stopping RolloutWorker_w1... +[2024-10-20 18:00:20,864][13109] Loop rollout_proc1_evt_loop terminating... +[2024-10-20 18:00:20,875][00556] Component RolloutWorker_w5 stopped! +[2024-10-20 18:00:20,874][13112] Stopping RolloutWorker_w5... +[2024-10-20 18:00:20,878][13112] Loop rollout_proc5_evt_loop terminating... +[2024-10-20 18:00:20,898][00556] Component RolloutWorker_w4 stopped! +[2024-10-20 18:00:20,900][13113] Stopping RolloutWorker_w4... +[2024-10-20 18:00:20,901][13113] Loop rollout_proc4_evt_loop terminating... +[2024-10-20 18:00:20,903][00556] Component RolloutWorker_w0 stopped! +[2024-10-20 18:00:20,907][13108] Stopping RolloutWorker_w0... +[2024-10-20 18:00:20,913][13111] Stopping RolloutWorker_w3... +[2024-10-20 18:00:20,914][13111] Loop rollout_proc3_evt_loop terminating... +[2024-10-20 18:00:20,914][00556] Component RolloutWorker_w3 stopped! +[2024-10-20 18:00:20,921][00556] Component RolloutWorker_w7 stopped! +[2024-10-20 18:00:20,923][13115] Stopping RolloutWorker_w7... +[2024-10-20 18:00:20,924][00556] Waiting for process learner_proc0 to stop... +[2024-10-20 18:00:20,928][13108] Loop rollout_proc0_evt_loop terminating... +[2024-10-20 18:00:20,932][13115] Loop rollout_proc7_evt_loop terminating... +[2024-10-20 18:00:22,753][00556] Waiting for process inference_proc0-0 to join... +[2024-10-20 18:00:22,760][00556] Waiting for process rollout_proc0 to join... +[2024-10-20 18:00:25,123][00556] Waiting for process rollout_proc1 to join... +[2024-10-20 18:00:25,127][00556] Waiting for process rollout_proc2 to join... +[2024-10-20 18:00:25,130][00556] Waiting for process rollout_proc3 to join... +[2024-10-20 18:00:25,138][00556] Waiting for process rollout_proc4 to join... +[2024-10-20 18:00:25,142][00556] Waiting for process rollout_proc5 to join... +[2024-10-20 18:00:25,147][00556] Waiting for process rollout_proc6 to join... +[2024-10-20 18:00:25,151][00556] Waiting for process rollout_proc7 to join... +[2024-10-20 18:00:25,154][00556] Batcher 0 profile tree view: +batching: 27.0408, releasing_batches: 0.0257 +[2024-10-20 18:00:25,156][00556] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 403.2960 +update_model: 8.7968 + weight_update: 0.0044 +one_step: 0.0084 + handle_policy_step: 589.4419 + deserialize: 14.4878, stack: 2.9567, obs_to_device_normalize: 120.9362, forward: 314.2505, send_messages: 28.3503 + prepare_outputs: 80.5140 + to_cpu: 46.0632 +[2024-10-20 18:00:25,158][00556] Learner 0 profile tree view: +misc: 0.0048, prepare_batch: 13.1270 +train: 75.0890 + epoch_init: 0.0063, minibatch_init: 0.0099, losses_postprocess: 0.6750, kl_divergence: 0.6642, after_optimizer: 3.2268 + calculate_losses: 26.8753 + losses_init: 0.0032, forward_head: 1.4167, bptt_initial: 18.0554, tail: 1.1208, advantages_returns: 0.2937, losses: 3.7877 + bptt: 1.9114 + bptt_forward_core: 1.7981 + update: 42.9522 + clip: 0.8273 +[2024-10-20 18:00:25,159][00556] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.3110, enqueue_policy_requests: 94.9974, env_step: 814.1256, overhead: 13.4528, complete_rollouts: 6.9011 +save_policy_outputs: 20.7421 + split_output_tensors: 8.0932 +[2024-10-20 18:00:25,163][00556] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.4022, enqueue_policy_requests: 95.2253, env_step: 810.8649, overhead: 13.2098, complete_rollouts: 6.5797 +save_policy_outputs: 20.1699 + split_output_tensors: 8.2991 +[2024-10-20 18:00:25,165][00556] Loop Runner_EvtLoop terminating... +[2024-10-20 18:00:25,166][00556] Runner profile tree view: +main_loop: 1067.1793 +[2024-10-20 18:00:25,168][00556] Collected {0: 8007680}, FPS: 3749.9 +[2024-10-20 18:00:25,200][00556] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-10-20 18:00:25,201][00556] Overriding arg 'num_workers' with value 1 passed from command line +[2024-10-20 18:00:25,203][00556] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-10-20 18:00:25,205][00556] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-10-20 18:00:25,207][00556] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-10-20 18:00:25,208][00556] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-10-20 18:00:25,209][00556] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-10-20 18:00:25,211][00556] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-10-20 18:00:25,212][00556] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-10-20 18:00:25,213][00556] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-10-20 18:00:25,217][00556] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-10-20 18:00:25,220][00556] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-10-20 18:00:25,222][00556] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-10-20 18:00:25,224][00556] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-10-20 18:00:25,226][00556] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-10-20 18:00:25,255][00556] RunningMeanStd input shape: (3, 72, 128) +[2024-10-20 18:00:25,257][00556] RunningMeanStd input shape: (1,) +[2024-10-20 18:00:25,271][00556] ConvEncoder: input_channels=3 +[2024-10-20 18:00:25,313][00556] Conv encoder output size: 512 +[2024-10-20 18:00:25,315][00556] Policy head output size: 512 +[2024-10-20 18:00:25,333][00556] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-10-20 18:00:25,787][00556] Num frames 100... +[2024-10-20 18:00:25,909][00556] Num frames 200... +[2024-10-20 18:00:26,028][00556] Num frames 300... +[2024-10-20 18:00:26,162][00556] Num frames 400... +[2024-10-20 18:00:26,286][00556] Num frames 500... +[2024-10-20 18:00:26,346][00556] Avg episode rewards: #0: 9.010, true rewards: #0: 5.010 +[2024-10-20 18:00:26,348][00556] Avg episode reward: 9.010, avg true_objective: 5.010 +[2024-10-20 18:00:26,476][00556] Num frames 600... +[2024-10-20 18:00:26,601][00556] Num frames 700... +[2024-10-20 18:00:26,727][00556] Num frames 800... +[2024-10-20 18:00:26,854][00556] Num frames 900... +[2024-10-20 18:00:26,976][00556] Num frames 1000... +[2024-10-20 18:00:27,103][00556] Num frames 1100... +[2024-10-20 18:00:27,229][00556] Num frames 1200... +[2024-10-20 18:00:27,354][00556] Num frames 1300... +[2024-10-20 18:00:27,535][00556] Avg episode rewards: #0: 13.485, true rewards: #0: 6.985 +[2024-10-20 18:00:27,537][00556] Avg episode reward: 13.485, avg true_objective: 6.985 +[2024-10-20 18:00:27,546][00556] Num frames 1400... +[2024-10-20 18:00:27,692][00556] Num frames 1500... +[2024-10-20 18:00:27,813][00556] Num frames 1600... +[2024-10-20 18:00:27,936][00556] Num frames 1700... +[2024-10-20 18:00:28,057][00556] Num frames 1800... +[2024-10-20 18:00:28,189][00556] Num frames 1900... +[2024-10-20 18:00:28,317][00556] Num frames 2000... +[2024-10-20 18:00:28,446][00556] Num frames 2100... +[2024-10-20 18:00:28,567][00556] Num frames 2200... +[2024-10-20 18:00:28,689][00556] Num frames 2300... +[2024-10-20 18:00:28,810][00556] Num frames 2400... +[2024-10-20 18:00:28,929][00556] Avg episode rewards: #0: 17.177, true rewards: #0: 8.177 +[2024-10-20 18:00:28,930][00556] Avg episode reward: 17.177, avg true_objective: 8.177 +[2024-10-20 18:00:28,987][00556] Num frames 2500... +[2024-10-20 18:00:29,107][00556] Num frames 2600... +[2024-10-20 18:00:29,236][00556] Num frames 2700... +[2024-10-20 18:00:29,357][00556] Num frames 2800... +[2024-10-20 18:00:29,484][00556] Num frames 2900... +[2024-10-20 18:00:29,607][00556] Num frames 3000... +[2024-10-20 18:00:29,725][00556] Num frames 3100... +[2024-10-20 18:00:29,846][00556] Num frames 3200... +[2024-10-20 18:00:29,968][00556] Num frames 3300... +[2024-10-20 18:00:30,086][00556] Num frames 3400... +[2024-10-20 18:00:30,217][00556] Num frames 3500... +[2024-10-20 18:00:30,291][00556] Avg episode rewards: #0: 19.288, true rewards: #0: 8.787 +[2024-10-20 18:00:30,293][00556] Avg episode reward: 19.288, avg true_objective: 8.787 +[2024-10-20 18:00:30,405][00556] Num frames 3600... +[2024-10-20 18:00:30,535][00556] Num frames 3700... +[2024-10-20 18:00:30,657][00556] Num frames 3800... +[2024-10-20 18:00:30,787][00556] Num frames 3900... +[2024-10-20 18:00:30,913][00556] Num frames 4000... +[2024-10-20 18:00:31,038][00556] Num frames 4100... +[2024-10-20 18:00:31,161][00556] Avg episode rewards: #0: 18.310, true rewards: #0: 8.310 +[2024-10-20 18:00:31,163][00556] Avg episode reward: 18.310, avg true_objective: 8.310 +[2024-10-20 18:00:31,229][00556] Num frames 4200... +[2024-10-20 18:00:31,354][00556] Num frames 4300... +[2024-10-20 18:00:31,485][00556] Num frames 4400... +[2024-10-20 18:00:31,612][00556] Num frames 4500... +[2024-10-20 18:00:31,735][00556] Num frames 4600... +[2024-10-20 18:00:31,801][00556] Avg episode rewards: #0: 16.847, true rewards: #0: 7.680 +[2024-10-20 18:00:31,803][00556] Avg episode reward: 16.847, avg true_objective: 7.680 +[2024-10-20 18:00:31,919][00556] Num frames 4700... +[2024-10-20 18:00:32,041][00556] Num frames 4800... +[2024-10-20 18:00:32,199][00556] Num frames 4900... +[2024-10-20 18:00:32,329][00556] Num frames 5000... +[2024-10-20 18:00:32,464][00556] Num frames 5100... +[2024-10-20 18:00:32,589][00556] Num frames 5200... +[2024-10-20 18:00:32,718][00556] Num frames 5300... +[2024-10-20 18:00:32,844][00556] Num frames 5400... +[2024-10-20 18:00:32,966][00556] Num frames 5500... +[2024-10-20 18:00:33,090][00556] Num frames 5600... +[2024-10-20 18:00:33,214][00556] Num frames 5700... +[2024-10-20 18:00:33,362][00556] Num frames 5800... +[2024-10-20 18:00:33,532][00556] Avg episode rewards: #0: 18.983, true rewards: #0: 8.411 +[2024-10-20 18:00:33,534][00556] Avg episode reward: 18.983, avg true_objective: 8.411 +[2024-10-20 18:00:33,557][00556] Num frames 5900... +[2024-10-20 18:00:33,722][00556] Num frames 6000... +[2024-10-20 18:00:33,892][00556] Num frames 6100... +[2024-10-20 18:00:34,060][00556] Num frames 6200... +[2024-10-20 18:00:34,224][00556] Num frames 6300... +[2024-10-20 18:00:34,405][00556] Num frames 6400... +[2024-10-20 18:00:34,575][00556] Num frames 6500... +[2024-10-20 18:00:34,737][00556] Num frames 6600... +[2024-10-20 18:00:34,908][00556] Num frames 6700... +[2024-10-20 18:00:35,080][00556] Num frames 6800... +[2024-10-20 18:00:35,249][00556] Num frames 6900... +[2024-10-20 18:00:35,452][00556] Num frames 7000... +[2024-10-20 18:00:35,633][00556] Num frames 7100... +[2024-10-20 18:00:35,809][00556] Num frames 7200... +[2024-10-20 18:00:35,981][00556] Num frames 7300... +[2024-10-20 18:00:36,160][00556] Num frames 7400... +[2024-10-20 18:00:36,283][00556] Num frames 7500... +[2024-10-20 18:00:36,431][00556] Num frames 7600... +[2024-10-20 18:00:36,561][00556] Num frames 7700... +[2024-10-20 18:00:36,640][00556] Avg episode rewards: #0: 21.771, true rewards: #0: 9.646 +[2024-10-20 18:00:36,641][00556] Avg episode reward: 21.771, avg true_objective: 9.646 +[2024-10-20 18:00:36,745][00556] Num frames 7800... +[2024-10-20 18:00:36,867][00556] Num frames 7900... +[2024-10-20 18:00:36,994][00556] Num frames 8000... +[2024-10-20 18:00:37,116][00556] Num frames 8100... +[2024-10-20 18:00:37,240][00556] Num frames 8200... +[2024-10-20 18:00:37,364][00556] Num frames 8300... +[2024-10-20 18:00:37,507][00556] Num frames 8400... +[2024-10-20 18:00:37,636][00556] Num frames 8500... +[2024-10-20 18:00:37,759][00556] Num frames 8600... +[2024-10-20 18:00:37,883][00556] Num frames 8700... +[2024-10-20 18:00:38,009][00556] Num frames 8800... +[2024-10-20 18:00:38,129][00556] Num frames 8900... +[2024-10-20 18:00:38,279][00556] Avg episode rewards: #0: 22.527, true rewards: #0: 9.971 +[2024-10-20 18:00:38,280][00556] Avg episode reward: 22.527, avg true_objective: 9.971 +[2024-10-20 18:00:38,314][00556] Num frames 9000... +[2024-10-20 18:00:38,446][00556] Num frames 9100... +[2024-10-20 18:00:38,574][00556] Num frames 9200... +[2024-10-20 18:00:38,695][00556] Num frames 9300... +[2024-10-20 18:00:38,816][00556] Num frames 9400... +[2024-10-20 18:00:38,938][00556] Num frames 9500... +[2024-10-20 18:00:39,057][00556] Num frames 9600... +[2024-10-20 18:00:39,179][00556] Num frames 9700... +[2024-10-20 18:00:39,299][00556] Num frames 9800... +[2024-10-20 18:00:39,427][00556] Num frames 9900... +[2024-10-20 18:00:39,558][00556] Num frames 10000... +[2024-10-20 18:00:39,680][00556] Num frames 10100... +[2024-10-20 18:00:39,800][00556] Num frames 10200... +[2024-10-20 18:00:39,890][00556] Avg episode rewards: #0: 23.326, true rewards: #0: 10.226 +[2024-10-20 18:00:39,892][00556] Avg episode reward: 23.326, avg true_objective: 10.226 +[2024-10-20 18:01:43,312][00556] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-10-20 18:01:43,800][00556] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-10-20 18:01:43,802][00556] Overriding arg 'num_workers' with value 1 passed from command line +[2024-10-20 18:01:43,804][00556] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-10-20 18:01:43,806][00556] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-10-20 18:01:43,808][00556] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-10-20 18:01:43,810][00556] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-10-20 18:01:43,812][00556] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-10-20 18:01:43,813][00556] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-10-20 18:01:43,814][00556] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-10-20 18:01:43,815][00556] Adding new argument 'hf_repository'='jerryvc/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-10-20 18:01:43,818][00556] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-10-20 18:01:43,819][00556] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-10-20 18:01:43,820][00556] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-10-20 18:01:43,821][00556] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-10-20 18:01:43,822][00556] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-10-20 18:01:43,863][00556] RunningMeanStd input shape: (3, 72, 128) +[2024-10-20 18:01:43,866][00556] RunningMeanStd input shape: (1,) +[2024-10-20 18:01:43,884][00556] ConvEncoder: input_channels=3 +[2024-10-20 18:01:43,950][00556] Conv encoder output size: 512 +[2024-10-20 18:01:43,952][00556] Policy head output size: 512 +[2024-10-20 18:01:43,982][00556] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001955_8007680.pth... +[2024-10-20 18:01:44,618][00556] Num frames 100... +[2024-10-20 18:01:44,778][00556] Num frames 200... +[2024-10-20 18:01:44,937][00556] Num frames 300... +[2024-10-20 18:01:45,113][00556] Num frames 400... +[2024-10-20 18:01:45,273][00556] Num frames 500... +[2024-10-20 18:01:45,431][00556] Num frames 600... +[2024-10-20 18:01:45,596][00556] Num frames 700... +[2024-10-20 18:01:45,757][00556] Num frames 800... +[2024-10-20 18:01:45,917][00556] Num frames 900... +[2024-10-20 18:01:46,085][00556] Num frames 1000... +[2024-10-20 18:01:46,253][00556] Num frames 1100... +[2024-10-20 18:01:46,422][00556] Num frames 1200... +[2024-10-20 18:01:46,594][00556] Num frames 1300... +[2024-10-20 18:01:46,782][00556] Num frames 1400... +[2024-10-20 18:01:46,944][00556] Num frames 1500... +[2024-10-20 18:01:47,116][00556] Num frames 1600... +[2024-10-20 18:01:47,343][00556] Avg episode rewards: #0: 39.890, true rewards: #0: 16.890 +[2024-10-20 18:01:47,346][00556] Avg episode reward: 39.890, avg true_objective: 16.890 +[2024-10-20 18:01:47,365][00556] Num frames 1700... +[2024-10-20 18:01:47,545][00556] Num frames 1800... +[2024-10-20 18:01:47,728][00556] Num frames 1900... +[2024-10-20 18:01:47,938][00556] Num frames 2000... +[2024-10-20 18:01:48,127][00556] Num frames 2100... +[2024-10-20 18:01:48,317][00556] Num frames 2200... +[2024-10-20 18:01:48,518][00556] Num frames 2300... +[2024-10-20 18:01:48,703][00556] Num frames 2400... +[2024-10-20 18:01:48,886][00556] Num frames 2500... +[2024-10-20 18:01:49,074][00556] Num frames 2600... +[2024-10-20 18:01:49,217][00556] Avg episode rewards: #0: 28.745, true rewards: #0: 13.245 +[2024-10-20 18:01:49,219][00556] Avg episode reward: 28.745, avg true_objective: 13.245 +[2024-10-20 18:01:49,320][00556] Num frames 2700... +[2024-10-20 18:01:49,506][00556] Num frames 2800... +[2024-10-20 18:01:49,705][00556] Num frames 2900... +[2024-10-20 18:01:49,886][00556] Num frames 3000... +[2024-10-20 18:01:50,055][00556] Num frames 3100... +[2024-10-20 18:01:50,232][00556] Num frames 3200... +[2024-10-20 18:01:50,414][00556] Num frames 3300... +[2024-10-20 18:01:50,543][00556] Num frames 3400... +[2024-10-20 18:01:50,659][00556] Avg episode rewards: #0: 24.163, true rewards: #0: 11.497 +[2024-10-20 18:01:50,661][00556] Avg episode reward: 24.163, avg true_objective: 11.497 +[2024-10-20 18:01:50,725][00556] Num frames 3500... +[2024-10-20 18:01:50,847][00556] Num frames 3600... +[2024-10-20 18:01:50,972][00556] Num frames 3700... +[2024-10-20 18:01:51,096][00556] Num frames 3800... +[2024-10-20 18:01:51,217][00556] Num frames 3900... +[2024-10-20 18:01:51,347][00556] Num frames 4000... +[2024-10-20 18:01:51,478][00556] Num frames 4100... +[2024-10-20 18:01:51,600][00556] Num frames 4200... +[2024-10-20 18:01:51,723][00556] Num frames 4300... +[2024-10-20 18:01:51,846][00556] Num frames 4400... +[2024-10-20 18:01:51,964][00556] Num frames 4500... +[2024-10-20 18:01:52,085][00556] Num frames 4600... +[2024-10-20 18:01:52,144][00556] Avg episode rewards: #0: 24.753, true rewards: #0: 11.502 +[2024-10-20 18:01:52,146][00556] Avg episode reward: 24.753, avg true_objective: 11.502 +[2024-10-20 18:01:52,274][00556] Num frames 4700... +[2024-10-20 18:01:52,413][00556] Num frames 4800... +[2024-10-20 18:01:52,593][00556] Num frames 4900... +[2024-10-20 18:01:52,759][00556] Num frames 5000... +[2024-10-20 18:01:52,921][00556] Num frames 5100... +[2024-10-20 18:01:53,107][00556] Avg episode rewards: #0: 21.754, true rewards: #0: 10.354 +[2024-10-20 18:01:53,110][00556] Avg episode reward: 21.754, avg true_objective: 10.354 +[2024-10-20 18:01:53,154][00556] Num frames 5200... +[2024-10-20 18:01:53,315][00556] Num frames 5300... +[2024-10-20 18:01:53,489][00556] Num frames 5400... +[2024-10-20 18:01:53,658][00556] Num frames 5500... +[2024-10-20 18:01:53,830][00556] Num frames 5600... +[2024-10-20 18:01:54,001][00556] Num frames 5700... +[2024-10-20 18:01:54,175][00556] Num frames 5800... +[2024-10-20 18:01:54,353][00556] Num frames 5900... +[2024-10-20 18:01:54,537][00556] Num frames 6000... +[2024-10-20 18:01:54,667][00556] Avg episode rewards: #0: 20.735, true rewards: #0: 10.068 +[2024-10-20 18:01:54,669][00556] Avg episode reward: 20.735, avg true_objective: 10.068 +[2024-10-20 18:01:54,773][00556] Num frames 6100... +[2024-10-20 18:01:54,949][00556] Num frames 6200... +[2024-10-20 18:01:55,074][00556] Num frames 6300... +[2024-10-20 18:01:55,198][00556] Num frames 6400... +[2024-10-20 18:01:55,322][00556] Num frames 6500... +[2024-10-20 18:01:55,455][00556] Num frames 6600... +[2024-10-20 18:01:55,583][00556] Num frames 6700... +[2024-10-20 18:01:55,707][00556] Num frames 6800... +[2024-10-20 18:01:55,829][00556] Num frames 6900... +[2024-10-20 18:01:55,948][00556] Num frames 7000... +[2024-10-20 18:01:56,007][00556] Avg episode rewards: #0: 20.573, true rewards: #0: 10.001 +[2024-10-20 18:01:56,009][00556] Avg episode reward: 20.573, avg true_objective: 10.001 +[2024-10-20 18:01:56,133][00556] Num frames 7100... +[2024-10-20 18:01:56,254][00556] Num frames 7200... +[2024-10-20 18:01:56,376][00556] Num frames 7300... +[2024-10-20 18:01:56,516][00556] Num frames 7400... +[2024-10-20 18:01:56,638][00556] Num frames 7500... +[2024-10-20 18:01:56,759][00556] Num frames 7600... +[2024-10-20 18:01:56,882][00556] Num frames 7700... +[2024-10-20 18:01:57,009][00556] Num frames 7800... +[2024-10-20 18:01:57,149][00556] Num frames 7900... +[2024-10-20 18:01:57,327][00556] Num frames 8000... +[2024-10-20 18:01:57,504][00556] Num frames 8100... +[2024-10-20 18:01:57,656][00556] Num frames 8200... +[2024-10-20 18:01:57,792][00556] Num frames 8300... +[2024-10-20 18:01:57,919][00556] Num frames 8400... +[2024-10-20 18:01:58,028][00556] Avg episode rewards: #0: 22.301, true rewards: #0: 10.551 +[2024-10-20 18:01:58,030][00556] Avg episode reward: 22.301, avg true_objective: 10.551 +[2024-10-20 18:01:58,106][00556] Num frames 8500... +[2024-10-20 18:01:58,233][00556] Num frames 8600... +[2024-10-20 18:01:58,355][00556] Num frames 8700... +[2024-10-20 18:01:58,491][00556] Num frames 8800... +[2024-10-20 18:01:58,580][00556] Avg episode rewards: #0: 20.695, true rewards: #0: 9.806 +[2024-10-20 18:01:58,581][00556] Avg episode reward: 20.695, avg true_objective: 9.806 +[2024-10-20 18:01:58,676][00556] Num frames 8900... +[2024-10-20 18:01:58,803][00556] Num frames 9000... +[2024-10-20 18:01:58,929][00556] Num frames 9100... +[2024-10-20 18:01:59,054][00556] Num frames 9200... +[2024-10-20 18:01:59,182][00556] Num frames 9300... +[2024-10-20 18:01:59,306][00556] Num frames 9400... +[2024-10-20 18:01:59,437][00556] Num frames 9500... +[2024-10-20 18:01:59,559][00556] Num frames 9600... +[2024-10-20 18:01:59,692][00556] Num frames 9700... +[2024-10-20 18:01:59,814][00556] Num frames 9800... +[2024-10-20 18:01:59,942][00556] Num frames 9900... +[2024-10-20 18:02:00,071][00556] Num frames 10000... +[2024-10-20 18:02:00,196][00556] Num frames 10100... +[2024-10-20 18:02:00,337][00556] Avg episode rewards: #0: 21.469, true rewards: #0: 10.169 +[2024-10-20 18:02:00,339][00556] Avg episode reward: 21.469, avg true_objective: 10.169 +[2024-10-20 18:03:02,211][00556] Replay video saved to /content/train_dir/default_experiment/replay.mp4!