diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1223 @@ +[2024-08-20 19:37:42,274][01505] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-08-20 19:37:42,283][01505] Rollout worker 0 uses device cpu +[2024-08-20 19:37:42,285][01505] Rollout worker 1 uses device cpu +[2024-08-20 19:37:42,288][01505] Rollout worker 2 uses device cpu +[2024-08-20 19:37:42,290][01505] Rollout worker 3 uses device cpu +[2024-08-20 19:37:42,294][01505] Rollout worker 4 uses device cpu +[2024-08-20 19:37:42,299][01505] Rollout worker 5 uses device cpu +[2024-08-20 19:37:42,301][01505] Rollout worker 6 uses device cpu +[2024-08-20 19:37:42,302][01505] Rollout worker 7 uses device cpu +[2024-08-20 19:37:42,555][01505] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-08-20 19:37:42,562][01505] InferenceWorker_p0-w0: min num requests: 2 +[2024-08-20 19:37:42,638][01505] Starting all processes... +[2024-08-20 19:37:42,647][01505] Starting process learner_proc0 +[2024-08-20 19:37:45,330][01505] Starting all processes... +[2024-08-20 19:37:45,340][01505] Starting process inference_proc0-0 +[2024-08-20 19:37:45,341][01505] Starting process rollout_proc0 +[2024-08-20 19:37:45,341][01505] Starting process rollout_proc1 +[2024-08-20 19:37:45,341][01505] Starting process rollout_proc2 +[2024-08-20 19:37:45,341][01505] Starting process rollout_proc3 +[2024-08-20 19:37:45,341][01505] Starting process rollout_proc4 +[2024-08-20 19:37:45,341][01505] Starting process rollout_proc5 +[2024-08-20 19:37:45,341][01505] Starting process rollout_proc6 +[2024-08-20 19:37:45,341][01505] Starting process rollout_proc7 +[2024-08-20 19:38:05,042][03648] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-08-20 19:38:05,042][03648] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-08-20 19:38:05,166][03667] Worker 5 uses CPU cores [1] +[2024-08-20 19:38:05,191][03648] Num visible devices: 1 +[2024-08-20 19:38:05,226][03648] Starting seed is not provided +[2024-08-20 19:38:05,227][03648] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-08-20 19:38:05,228][03648] Initializing actor-critic model on device cuda:0 +[2024-08-20 19:38:05,229][03648] RunningMeanStd input shape: (3, 72, 128) +[2024-08-20 19:38:05,232][03648] RunningMeanStd input shape: (1,) +[2024-08-20 19:38:05,232][01505] Heartbeat connected on Batcher_0 +[2024-08-20 19:38:05,297][03662] Worker 0 uses CPU cores [0] +[2024-08-20 19:38:05,347][03648] ConvEncoder: input_channels=3 +[2024-08-20 19:38:05,381][01505] Heartbeat connected on RolloutWorker_w5 +[2024-08-20 19:38:05,382][03664] Worker 2 uses CPU cores [0] +[2024-08-20 19:38:05,475][01505] Heartbeat connected on RolloutWorker_w0 +[2024-08-20 19:38:05,546][01505] Heartbeat connected on RolloutWorker_w2 +[2024-08-20 19:38:05,578][03668] Worker 6 uses CPU cores [0] +[2024-08-20 19:38:05,634][03661] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-08-20 19:38:05,635][03661] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-08-20 19:38:05,657][03661] Num visible devices: 1 +[2024-08-20 19:38:05,656][03663] Worker 1 uses CPU cores [1] +[2024-08-20 19:38:05,675][01505] Heartbeat connected on InferenceWorker_p0-w0 +[2024-08-20 19:38:05,691][01505] Heartbeat connected on RolloutWorker_w1 +[2024-08-20 19:38:05,741][03669] Worker 7 uses CPU cores [1] +[2024-08-20 19:38:05,747][01505] Heartbeat connected on RolloutWorker_w6 +[2024-08-20 19:38:05,818][03665] Worker 3 uses CPU cores [1] +[2024-08-20 19:38:05,832][01505] Heartbeat connected on RolloutWorker_w7 +[2024-08-20 19:38:05,856][03666] Worker 4 uses CPU cores [0] +[2024-08-20 19:38:05,890][01505] Heartbeat connected on RolloutWorker_w3 +[2024-08-20 19:38:05,915][01505] Heartbeat connected on RolloutWorker_w4 +[2024-08-20 19:38:06,040][03648] Conv encoder output size: 512 +[2024-08-20 19:38:06,041][03648] Policy head output size: 512 +[2024-08-20 19:38:06,121][03648] Created Actor Critic model with architecture: +[2024-08-20 19:38:06,121][03648] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-08-20 19:38:06,400][03648] Using optimizer +[2024-08-20 19:38:07,215][03648] No checkpoints found +[2024-08-20 19:38:07,216][03648] Did not load from checkpoint, starting from scratch! +[2024-08-20 19:38:07,216][03648] Initialized policy 0 weights for model version 0 +[2024-08-20 19:38:07,219][03648] LearnerWorker_p0 finished initialization! +[2024-08-20 19:38:07,220][03648] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-08-20 19:38:07,219][01505] Heartbeat connected on LearnerWorker_p0 +[2024-08-20 19:38:07,370][03661] RunningMeanStd input shape: (3, 72, 128) +[2024-08-20 19:38:07,372][03661] RunningMeanStd input shape: (1,) +[2024-08-20 19:38:07,386][03661] ConvEncoder: input_channels=3 +[2024-08-20 19:38:07,502][03661] Conv encoder output size: 512 +[2024-08-20 19:38:07,502][03661] Policy head output size: 512 +[2024-08-20 19:38:07,567][01505] Inference worker 0-0 is ready! +[2024-08-20 19:38:07,569][01505] All inference workers are ready! Signal rollout workers to start! +[2024-08-20 19:38:07,833][03664] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-08-20 19:38:07,856][03665] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-08-20 19:38:07,872][03669] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-08-20 19:38:07,896][03663] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-08-20 19:38:07,893][03662] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-08-20 19:38:07,914][03666] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-08-20 19:38:07,922][03668] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-08-20 19:38:07,931][03667] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-08-20 19:38:09,083][03663] Decorrelating experience for 0 frames... +[2024-08-20 19:38:09,085][03665] Decorrelating experience for 0 frames... +[2024-08-20 19:38:09,083][03664] Decorrelating experience for 0 frames... +[2024-08-20 19:38:09,086][03662] Decorrelating experience for 0 frames... +[2024-08-20 19:38:09,522][03663] Decorrelating experience for 32 frames... +[2024-08-20 19:38:10,202][01505] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-08-20 19:38:10,212][03668] Decorrelating experience for 0 frames... +[2024-08-20 19:38:10,223][03662] Decorrelating experience for 32 frames... +[2024-08-20 19:38:10,244][03666] Decorrelating experience for 0 frames... +[2024-08-20 19:38:10,703][03664] Decorrelating experience for 32 frames... +[2024-08-20 19:38:11,257][03666] Decorrelating experience for 32 frames... +[2024-08-20 19:38:11,510][03665] Decorrelating experience for 32 frames... +[2024-08-20 19:38:11,628][03669] Decorrelating experience for 0 frames... +[2024-08-20 19:38:11,734][03667] Decorrelating experience for 0 frames... +[2024-08-20 19:38:12,125][03664] Decorrelating experience for 64 frames... +[2024-08-20 19:38:12,750][03662] Decorrelating experience for 64 frames... +[2024-08-20 19:38:13,181][03666] Decorrelating experience for 64 frames... +[2024-08-20 19:38:13,743][03664] Decorrelating experience for 96 frames... +[2024-08-20 19:38:14,172][03663] Decorrelating experience for 64 frames... +[2024-08-20 19:38:14,429][03667] Decorrelating experience for 32 frames... +[2024-08-20 19:38:14,680][03662] Decorrelating experience for 96 frames... +[2024-08-20 19:38:14,970][03665] Decorrelating experience for 64 frames... +[2024-08-20 19:38:15,206][01505] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-08-20 19:38:15,543][03669] Decorrelating experience for 32 frames... +[2024-08-20 19:38:16,863][03669] Decorrelating experience for 64 frames... +[2024-08-20 19:38:17,541][03668] Decorrelating experience for 32 frames... +[2024-08-20 19:38:18,235][03666] Decorrelating experience for 96 frames... +[2024-08-20 19:38:18,947][03669] Decorrelating experience for 96 frames... +[2024-08-20 19:38:20,085][03667] Decorrelating experience for 64 frames... +[2024-08-20 19:38:20,202][01505] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 125.8. Samples: 1258. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-08-20 19:38:20,204][01505] Avg episode reward: [(0, '3.377')] +[2024-08-20 19:38:21,198][03665] Decorrelating experience for 96 frames... +[2024-08-20 19:38:22,850][03668] Decorrelating experience for 64 frames... +[2024-08-20 19:38:23,611][03648] Signal inference workers to stop experience collection... +[2024-08-20 19:38:23,623][03661] InferenceWorker_p0-w0: stopping experience collection +[2024-08-20 19:38:23,788][03667] Decorrelating experience for 96 frames... +[2024-08-20 19:38:24,168][03668] Decorrelating experience for 96 frames... +[2024-08-20 19:38:24,327][03663] Decorrelating experience for 96 frames... +[2024-08-20 19:38:25,202][01505] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 168.1. Samples: 2522. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-08-20 19:38:25,204][01505] Avg episode reward: [(0, '3.351')] +[2024-08-20 19:38:25,655][03648] Signal inference workers to resume experience collection... +[2024-08-20 19:38:25,657][03661] InferenceWorker_p0-w0: resuming experience collection +[2024-08-20 19:38:30,202][01505] Fps is (10 sec: 2048.0, 60 sec: 1024.0, 300 sec: 1024.0). Total num frames: 20480. Throughput: 0: 246.5. Samples: 4930. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2024-08-20 19:38:30,207][01505] Avg episode reward: [(0, '3.551')] +[2024-08-20 19:38:35,207][01505] Fps is (10 sec: 3275.2, 60 sec: 1310.5, 300 sec: 1310.5). Total num frames: 32768. Throughput: 0: 355.6. Samples: 8892. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:38:35,210][01505] Avg episode reward: [(0, '3.816')] +[2024-08-20 19:38:38,037][03661] Updated weights for policy 0, policy_version 10 (0.0522) +[2024-08-20 19:38:40,202][01505] Fps is (10 sec: 2867.2, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 49152. Throughput: 0: 350.8. Samples: 10524. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:38:40,205][01505] Avg episode reward: [(0, '4.036')] +[2024-08-20 19:38:45,208][01505] Fps is (10 sec: 3686.2, 60 sec: 1989.2, 300 sec: 1989.2). Total num frames: 69632. Throughput: 0: 471.1. Samples: 16490. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:38:45,211][01505] Avg episode reward: [(0, '4.400')] +[2024-08-20 19:38:48,106][03661] Updated weights for policy 0, policy_version 20 (0.0042) +[2024-08-20 19:38:50,203][01505] Fps is (10 sec: 3686.1, 60 sec: 2150.4, 300 sec: 2150.4). Total num frames: 86016. Throughput: 0: 553.1. Samples: 22126. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:38:50,208][01505] Avg episode reward: [(0, '4.330')] +[2024-08-20 19:38:55,207][01505] Fps is (10 sec: 2867.4, 60 sec: 2184.3, 300 sec: 2184.3). Total num frames: 98304. Throughput: 0: 533.5. Samples: 24010. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:38:55,210][01505] Avg episode reward: [(0, '4.295')] +[2024-08-20 19:39:00,202][01505] Fps is (10 sec: 3277.0, 60 sec: 2375.7, 300 sec: 2375.7). Total num frames: 118784. Throughput: 0: 642.1. Samples: 28890. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:39:00,207][01505] Avg episode reward: [(0, '4.280')] +[2024-08-20 19:39:00,210][03648] Saving new best policy, reward=4.280! +[2024-08-20 19:39:00,998][03661] Updated weights for policy 0, policy_version 30 (0.0023) +[2024-08-20 19:39:05,202][01505] Fps is (10 sec: 4098.0, 60 sec: 2532.1, 300 sec: 2532.1). Total num frames: 139264. Throughput: 0: 750.0. Samples: 35008. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:39:05,207][01505] Avg episode reward: [(0, '4.472')] +[2024-08-20 19:39:05,219][03648] Saving new best policy, reward=4.472! +[2024-08-20 19:39:10,202][01505] Fps is (10 sec: 3276.8, 60 sec: 2525.9, 300 sec: 2525.9). Total num frames: 151552. Throughput: 0: 775.5. Samples: 37418. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:39:10,209][01505] Avg episode reward: [(0, '4.469')] +[2024-08-20 19:39:13,960][03661] Updated weights for policy 0, policy_version 40 (0.0038) +[2024-08-20 19:39:15,202][01505] Fps is (10 sec: 2867.2, 60 sec: 2799.1, 300 sec: 2583.6). Total num frames: 167936. Throughput: 0: 805.4. Samples: 41174. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:39:15,207][01505] Avg episode reward: [(0, '4.439')] +[2024-08-20 19:39:20,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3140.3, 300 sec: 2691.7). Total num frames: 188416. Throughput: 0: 847.6. Samples: 47030. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:39:20,207][01505] Avg episode reward: [(0, '4.409')] +[2024-08-20 19:39:23,899][03661] Updated weights for policy 0, policy_version 50 (0.0034) +[2024-08-20 19:39:25,204][01505] Fps is (10 sec: 4095.2, 60 sec: 3481.5, 300 sec: 2785.2). Total num frames: 208896. Throughput: 0: 881.2. Samples: 50180. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:39:25,210][01505] Avg episode reward: [(0, '4.416')] +[2024-08-20 19:39:30,204][01505] Fps is (10 sec: 3276.2, 60 sec: 3345.0, 300 sec: 2764.7). Total num frames: 221184. Throughput: 0: 855.1. Samples: 54966. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:39:30,209][01505] Avg episode reward: [(0, '4.431')] +[2024-08-20 19:39:35,202][01505] Fps is (10 sec: 2458.1, 60 sec: 3345.3, 300 sec: 2746.7). Total num frames: 233472. Throughput: 0: 826.1. Samples: 59300. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:39:35,204][01505] Avg episode reward: [(0, '4.550')] +[2024-08-20 19:39:35,226][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000057_233472.pth... +[2024-08-20 19:39:35,366][03648] Saving new best policy, reward=4.550! +[2024-08-20 19:39:37,262][03661] Updated weights for policy 0, policy_version 60 (0.0026) +[2024-08-20 19:39:40,202][01505] Fps is (10 sec: 3277.4, 60 sec: 3413.3, 300 sec: 2821.7). Total num frames: 253952. Throughput: 0: 844.1. Samples: 61990. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-08-20 19:39:40,207][01505] Avg episode reward: [(0, '4.466')] +[2024-08-20 19:39:45,203][01505] Fps is (10 sec: 3686.1, 60 sec: 3345.3, 300 sec: 2845.6). Total num frames: 270336. Throughput: 0: 863.4. Samples: 67746. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:39:45,207][01505] Avg episode reward: [(0, '4.717')] +[2024-08-20 19:39:45,228][03648] Saving new best policy, reward=4.717! +[2024-08-20 19:39:50,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 2826.2). Total num frames: 282624. Throughput: 0: 807.2. Samples: 71334. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:39:50,205][01505] Avg episode reward: [(0, '4.572')] +[2024-08-20 19:39:50,816][03661] Updated weights for policy 0, policy_version 70 (0.0028) +[2024-08-20 19:39:55,203][01505] Fps is (10 sec: 3277.1, 60 sec: 3413.6, 300 sec: 2886.7). Total num frames: 303104. Throughput: 0: 813.0. Samples: 74002. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:39:55,209][01505] Avg episode reward: [(0, '4.543')] +[2024-08-20 19:40:00,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 2941.7). Total num frames: 323584. Throughput: 0: 870.5. Samples: 80346. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-08-20 19:40:00,210][01505] Avg episode reward: [(0, '4.423')] +[2024-08-20 19:40:00,611][03661] Updated weights for policy 0, policy_version 80 (0.0024) +[2024-08-20 19:40:05,203][01505] Fps is (10 sec: 3276.7, 60 sec: 3276.8, 300 sec: 2920.6). Total num frames: 335872. Throughput: 0: 846.8. Samples: 85138. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:40:05,208][01505] Avg episode reward: [(0, '4.279')] +[2024-08-20 19:40:10,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 2935.5). Total num frames: 352256. Throughput: 0: 818.6. Samples: 87014. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:40:10,208][01505] Avg episode reward: [(0, '4.412')] +[2024-08-20 19:40:13,918][03661] Updated weights for policy 0, policy_version 90 (0.0043) +[2024-08-20 19:40:15,202][01505] Fps is (10 sec: 3686.5, 60 sec: 3413.3, 300 sec: 2981.9). Total num frames: 372736. Throughput: 0: 835.5. Samples: 92564. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:40:15,208][01505] Avg episode reward: [(0, '4.704')] +[2024-08-20 19:40:20,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3024.7). Total num frames: 393216. Throughput: 0: 876.7. Samples: 98750. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:40:20,207][01505] Avg episode reward: [(0, '4.810')] +[2024-08-20 19:40:20,212][03648] Saving new best policy, reward=4.810! +[2024-08-20 19:40:25,207][01505] Fps is (10 sec: 3275.2, 60 sec: 3276.6, 300 sec: 3003.6). Total num frames: 405504. Throughput: 0: 857.1. Samples: 100562. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:40:25,210][01505] Avg episode reward: [(0, '4.787')] +[2024-08-20 19:40:26,522][03661] Updated weights for policy 0, policy_version 100 (0.0031) +[2024-08-20 19:40:30,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.2, 300 sec: 3013.5). Total num frames: 421888. Throughput: 0: 822.8. Samples: 104772. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-08-20 19:40:30,205][01505] Avg episode reward: [(0, '4.733')] +[2024-08-20 19:40:35,203][01505] Fps is (10 sec: 3688.1, 60 sec: 3481.6, 300 sec: 3050.8). Total num frames: 442368. Throughput: 0: 884.3. Samples: 111128. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:40:35,206][01505] Avg episode reward: [(0, '4.819')] +[2024-08-20 19:40:35,217][03648] Saving new best policy, reward=4.819! +[2024-08-20 19:40:36,865][03661] Updated weights for policy 0, policy_version 110 (0.0045) +[2024-08-20 19:40:40,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3058.3). Total num frames: 458752. Throughput: 0: 893.8. Samples: 114222. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:40:40,212][01505] Avg episode reward: [(0, '4.872')] +[2024-08-20 19:40:40,217][03648] Saving new best policy, reward=4.872! +[2024-08-20 19:40:45,210][01505] Fps is (10 sec: 2865.0, 60 sec: 3344.7, 300 sec: 3038.8). Total num frames: 471040. Throughput: 0: 837.7. Samples: 118048. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:40:45,215][01505] Avg episode reward: [(0, '4.760')] +[2024-08-20 19:40:49,688][03661] Updated weights for policy 0, policy_version 120 (0.0027) +[2024-08-20 19:40:50,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3072.0). Total num frames: 491520. Throughput: 0: 852.3. Samples: 123490. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-08-20 19:40:50,204][01505] Avg episode reward: [(0, '4.821')] +[2024-08-20 19:40:55,202][01505] Fps is (10 sec: 4099.3, 60 sec: 3481.6, 300 sec: 3103.0). Total num frames: 512000. Throughput: 0: 880.8. Samples: 126648. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-08-20 19:40:55,208][01505] Avg episode reward: [(0, '4.510')] +[2024-08-20 19:41:00,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3108.1). Total num frames: 528384. Throughput: 0: 874.9. Samples: 131934. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:41:00,206][01505] Avg episode reward: [(0, '4.540')] +[2024-08-20 19:41:01,309][03661] Updated weights for policy 0, policy_version 130 (0.0022) +[2024-08-20 19:41:05,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.4, 300 sec: 3089.6). Total num frames: 540672. Throughput: 0: 825.2. Samples: 135882. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:41:05,205][01505] Avg episode reward: [(0, '4.554')] +[2024-08-20 19:41:10,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3140.3). Total num frames: 565248. Throughput: 0: 856.0. Samples: 139080. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:41:10,208][01505] Avg episode reward: [(0, '4.646')] +[2024-08-20 19:41:12,237][03661] Updated weights for policy 0, policy_version 140 (0.0047) +[2024-08-20 19:41:15,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3144.0). Total num frames: 581632. Throughput: 0: 899.7. Samples: 145260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:41:15,206][01505] Avg episode reward: [(0, '4.687')] +[2024-08-20 19:41:20,204][01505] Fps is (10 sec: 2866.7, 60 sec: 3345.0, 300 sec: 3125.9). Total num frames: 593920. Throughput: 0: 848.1. Samples: 149292. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:41:20,207][01505] Avg episode reward: [(0, '4.642')] +[2024-08-20 19:41:25,179][03661] Updated weights for policy 0, policy_version 150 (0.0034) +[2024-08-20 19:41:25,204][01505] Fps is (10 sec: 3276.3, 60 sec: 3481.8, 300 sec: 3150.7). Total num frames: 614400. Throughput: 0: 826.2. Samples: 151402. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:41:25,211][01505] Avg episode reward: [(0, '4.575')] +[2024-08-20 19:41:30,202][01505] Fps is (10 sec: 4096.8, 60 sec: 3549.9, 300 sec: 3174.4). Total num frames: 634880. Throughput: 0: 883.8. Samples: 157812. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:41:30,209][01505] Avg episode reward: [(0, '4.831')] +[2024-08-20 19:41:35,202][01505] Fps is (10 sec: 3686.9, 60 sec: 3481.6, 300 sec: 3176.9). Total num frames: 651264. Throughput: 0: 882.2. Samples: 163190. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:41:35,211][01505] Avg episode reward: [(0, '4.915')] +[2024-08-20 19:41:35,227][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000159_651264.pth... +[2024-08-20 19:41:35,422][03648] Saving new best policy, reward=4.915! +[2024-08-20 19:41:37,042][03661] Updated weights for policy 0, policy_version 160 (0.0042) +[2024-08-20 19:41:40,205][01505] Fps is (10 sec: 2866.4, 60 sec: 3413.2, 300 sec: 3159.7). Total num frames: 663552. Throughput: 0: 850.1. Samples: 164906. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:41:40,208][01505] Avg episode reward: [(0, '5.055')] +[2024-08-20 19:41:40,219][03648] Saving new best policy, reward=5.055! +[2024-08-20 19:41:45,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3550.3, 300 sec: 3181.5). Total num frames: 684032. Throughput: 0: 843.2. Samples: 169878. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:41:45,206][01505] Avg episode reward: [(0, '4.814')] +[2024-08-20 19:41:48,189][03661] Updated weights for policy 0, policy_version 170 (0.0043) +[2024-08-20 19:41:50,202][01505] Fps is (10 sec: 4097.2, 60 sec: 3549.9, 300 sec: 3202.3). Total num frames: 704512. Throughput: 0: 898.3. Samples: 176306. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:41:50,209][01505] Avg episode reward: [(0, '4.648')] +[2024-08-20 19:41:55,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3185.8). Total num frames: 716800. Throughput: 0: 876.3. Samples: 178512. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:41:55,208][01505] Avg episode reward: [(0, '4.625')] +[2024-08-20 19:42:00,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3187.8). Total num frames: 733184. Throughput: 0: 826.8. Samples: 182468. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:42:00,212][01505] Avg episode reward: [(0, '4.973')] +[2024-08-20 19:42:00,915][03661] Updated weights for policy 0, policy_version 180 (0.0035) +[2024-08-20 19:42:05,203][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3207.1). Total num frames: 753664. Throughput: 0: 875.7. Samples: 188698. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:42:05,205][01505] Avg episode reward: [(0, '5.324')] +[2024-08-20 19:42:05,219][03648] Saving new best policy, reward=5.324! +[2024-08-20 19:42:10,204][01505] Fps is (10 sec: 3685.7, 60 sec: 3413.2, 300 sec: 3208.5). Total num frames: 770048. Throughput: 0: 895.4. Samples: 191696. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:42:10,210][01505] Avg episode reward: [(0, '5.402')] +[2024-08-20 19:42:10,216][03648] Saving new best policy, reward=5.402! +[2024-08-20 19:42:12,353][03661] Updated weights for policy 0, policy_version 190 (0.0033) +[2024-08-20 19:42:15,205][01505] Fps is (10 sec: 2866.4, 60 sec: 3344.9, 300 sec: 3193.2). Total num frames: 782336. Throughput: 0: 842.7. Samples: 195734. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-08-20 19:42:15,212][01505] Avg episode reward: [(0, '5.598')] +[2024-08-20 19:42:15,223][03648] Saving new best policy, reward=5.598! +[2024-08-20 19:42:20,202][01505] Fps is (10 sec: 3277.4, 60 sec: 3481.7, 300 sec: 3211.3). Total num frames: 802816. Throughput: 0: 834.7. Samples: 200750. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:42:20,205][01505] Avg episode reward: [(0, '5.637')] +[2024-08-20 19:42:20,209][03648] Saving new best policy, reward=5.637! +[2024-08-20 19:42:24,571][03661] Updated weights for policy 0, policy_version 200 (0.0021) +[2024-08-20 19:42:25,202][01505] Fps is (10 sec: 3687.5, 60 sec: 3413.4, 300 sec: 3212.5). Total num frames: 819200. Throughput: 0: 853.8. Samples: 203324. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:42:25,205][01505] Avg episode reward: [(0, '5.974')] +[2024-08-20 19:42:25,214][03648] Saving new best policy, reward=5.974! +[2024-08-20 19:42:30,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3213.8). Total num frames: 835584. Throughput: 0: 862.9. Samples: 208710. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:42:30,207][01505] Avg episode reward: [(0, '5.894')] +[2024-08-20 19:42:35,203][01505] Fps is (10 sec: 2867.1, 60 sec: 3276.8, 300 sec: 3199.5). Total num frames: 847872. Throughput: 0: 807.2. Samples: 212632. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:42:35,205][01505] Avg episode reward: [(0, '5.936')] +[2024-08-20 19:42:37,476][03661] Updated weights for policy 0, policy_version 210 (0.0053) +[2024-08-20 19:42:40,206][01505] Fps is (10 sec: 3275.5, 60 sec: 3413.3, 300 sec: 3216.1). Total num frames: 868352. Throughput: 0: 826.4. Samples: 215704. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:42:40,214][01505] Avg episode reward: [(0, '5.869')] +[2024-08-20 19:42:45,202][01505] Fps is (10 sec: 4096.1, 60 sec: 3413.3, 300 sec: 3232.1). Total num frames: 888832. Throughput: 0: 877.2. Samples: 221944. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:42:45,209][01505] Avg episode reward: [(0, '5.481')] +[2024-08-20 19:42:48,846][03661] Updated weights for policy 0, policy_version 220 (0.0043) +[2024-08-20 19:42:50,202][01505] Fps is (10 sec: 3278.1, 60 sec: 3276.8, 300 sec: 3218.3). Total num frames: 901120. Throughput: 0: 836.0. Samples: 226318. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:42:50,209][01505] Avg episode reward: [(0, '5.615')] +[2024-08-20 19:42:55,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3219.3). Total num frames: 917504. Throughput: 0: 812.9. Samples: 228276. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:42:55,205][01505] Avg episode reward: [(0, '5.707')] +[2024-08-20 19:43:00,153][03661] Updated weights for policy 0, policy_version 230 (0.0032) +[2024-08-20 19:43:00,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3248.6). Total num frames: 942080. Throughput: 0: 860.3. Samples: 234446. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:43:00,208][01505] Avg episode reward: [(0, '6.086')] +[2024-08-20 19:43:00,211][03648] Saving new best policy, reward=6.086! +[2024-08-20 19:43:05,205][01505] Fps is (10 sec: 4094.9, 60 sec: 3413.2, 300 sec: 3249.0). Total num frames: 958464. Throughput: 0: 873.9. Samples: 240080. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:43:05,210][01505] Avg episode reward: [(0, '6.213')] +[2024-08-20 19:43:05,222][03648] Saving new best policy, reward=6.213! +[2024-08-20 19:43:10,206][01505] Fps is (10 sec: 2866.0, 60 sec: 3345.0, 300 sec: 3290.7). Total num frames: 970752. Throughput: 0: 857.1. Samples: 241896. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:43:10,208][01505] Avg episode reward: [(0, '6.060')] +[2024-08-20 19:43:12,979][03661] Updated weights for policy 0, policy_version 240 (0.0034) +[2024-08-20 19:43:15,202][01505] Fps is (10 sec: 3277.7, 60 sec: 3481.8, 300 sec: 3360.1). Total num frames: 991232. Throughput: 0: 844.2. Samples: 246700. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:43:15,207][01505] Avg episode reward: [(0, '5.925')] +[2024-08-20 19:43:20,202][01505] Fps is (10 sec: 4097.6, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 1011712. Throughput: 0: 898.2. Samples: 253050. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:43:20,207][01505] Avg episode reward: [(0, '6.018')] +[2024-08-20 19:43:23,810][03661] Updated weights for policy 0, policy_version 250 (0.0016) +[2024-08-20 19:43:25,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 1024000. Throughput: 0: 888.0. Samples: 255660. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:43:25,207][01505] Avg episode reward: [(0, '6.052')] +[2024-08-20 19:43:30,203][01505] Fps is (10 sec: 2867.1, 60 sec: 3413.3, 300 sec: 3415.7). Total num frames: 1040384. Throughput: 0: 833.5. Samples: 259454. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-08-20 19:43:30,210][01505] Avg episode reward: [(0, '6.703')] +[2024-08-20 19:43:30,214][03648] Saving new best policy, reward=6.703! +[2024-08-20 19:43:35,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3429.5). Total num frames: 1060864. Throughput: 0: 867.1. Samples: 265338. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:43:35,209][01505] Avg episode reward: [(0, '6.718')] +[2024-08-20 19:43:35,228][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000259_1060864.pth... +[2024-08-20 19:43:35,395][03648] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000057_233472.pth +[2024-08-20 19:43:35,419][03648] Saving new best policy, reward=6.718! +[2024-08-20 19:43:35,940][03661] Updated weights for policy 0, policy_version 260 (0.0030) +[2024-08-20 19:43:40,202][01505] Fps is (10 sec: 3686.5, 60 sec: 3481.8, 300 sec: 3415.7). Total num frames: 1077248. Throughput: 0: 889.9. Samples: 268322. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:43:40,205][01505] Avg episode reward: [(0, '7.742')] +[2024-08-20 19:43:40,227][03648] Saving new best policy, reward=7.742! +[2024-08-20 19:43:45,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3401.8). Total num frames: 1089536. Throughput: 0: 853.2. Samples: 272840. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:43:45,209][01505] Avg episode reward: [(0, '7.456')] +[2024-08-20 19:43:49,307][03661] Updated weights for policy 0, policy_version 270 (0.0040) +[2024-08-20 19:43:50,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3415.7). Total num frames: 1105920. Throughput: 0: 826.9. Samples: 277288. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:43:50,210][01505] Avg episode reward: [(0, '7.528')] +[2024-08-20 19:43:55,204][01505] Fps is (10 sec: 4095.3, 60 sec: 3549.8, 300 sec: 3429.5). Total num frames: 1130496. Throughput: 0: 858.1. Samples: 280508. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:43:55,208][01505] Avg episode reward: [(0, '7.633')] +[2024-08-20 19:43:59,694][03661] Updated weights for policy 0, policy_version 280 (0.0022) +[2024-08-20 19:44:00,204][01505] Fps is (10 sec: 4095.4, 60 sec: 3413.2, 300 sec: 3415.6). Total num frames: 1146880. Throughput: 0: 888.5. Samples: 286686. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:44:00,207][01505] Avg episode reward: [(0, '8.145')] +[2024-08-20 19:44:00,211][03648] Saving new best policy, reward=8.145! +[2024-08-20 19:44:05,202][01505] Fps is (10 sec: 2867.7, 60 sec: 3345.2, 300 sec: 3415.6). Total num frames: 1159168. Throughput: 0: 827.8. Samples: 290302. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:44:05,207][01505] Avg episode reward: [(0, '8.100')] +[2024-08-20 19:44:10,202][01505] Fps is (10 sec: 2867.6, 60 sec: 3413.6, 300 sec: 3415.6). Total num frames: 1175552. Throughput: 0: 824.8. Samples: 292774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:44:10,204][01505] Avg episode reward: [(0, '8.307')] +[2024-08-20 19:44:10,214][03648] Saving new best policy, reward=8.307! +[2024-08-20 19:44:12,134][03661] Updated weights for policy 0, policy_version 290 (0.0031) +[2024-08-20 19:44:15,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 1200128. Throughput: 0: 880.6. Samples: 299082. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:44:15,205][01505] Avg episode reward: [(0, '8.502')] +[2024-08-20 19:44:15,215][03648] Saving new best policy, reward=8.502! +[2024-08-20 19:44:20,204][01505] Fps is (10 sec: 3685.8, 60 sec: 3345.0, 300 sec: 3401.8). Total num frames: 1212416. Throughput: 0: 854.6. Samples: 303798. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:44:20,210][01505] Avg episode reward: [(0, '8.595')] +[2024-08-20 19:44:20,212][03648] Saving new best policy, reward=8.595! +[2024-08-20 19:44:25,203][01505] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3401.8). Total num frames: 1224704. Throughput: 0: 829.8. Samples: 305664. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:44:25,205][01505] Avg episode reward: [(0, '8.529')] +[2024-08-20 19:44:25,488][03661] Updated weights for policy 0, policy_version 300 (0.0028) +[2024-08-20 19:44:30,202][01505] Fps is (10 sec: 3277.4, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1245184. Throughput: 0: 852.8. Samples: 311216. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:44:30,205][01505] Avg episode reward: [(0, '8.968')] +[2024-08-20 19:44:30,295][03648] Saving new best policy, reward=8.968! +[2024-08-20 19:44:35,202][01505] Fps is (10 sec: 4096.1, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1265664. Throughput: 0: 893.6. Samples: 317500. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:44:35,205][01505] Avg episode reward: [(0, '9.271')] +[2024-08-20 19:44:35,215][03648] Saving new best policy, reward=9.271! +[2024-08-20 19:44:35,696][03661] Updated weights for policy 0, policy_version 310 (0.0020) +[2024-08-20 19:44:40,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3415.7). Total num frames: 1277952. Throughput: 0: 863.2. Samples: 319350. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:44:40,205][01505] Avg episode reward: [(0, '9.244')] +[2024-08-20 19:44:45,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1294336. Throughput: 0: 818.2. Samples: 323506. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:44:45,207][01505] Avg episode reward: [(0, '9.183')] +[2024-08-20 19:44:48,236][03661] Updated weights for policy 0, policy_version 320 (0.0038) +[2024-08-20 19:44:50,203][01505] Fps is (10 sec: 3686.3, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 1314816. Throughput: 0: 875.9. Samples: 329716. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:44:50,206][01505] Avg episode reward: [(0, '9.817')] +[2024-08-20 19:44:50,274][03648] Saving new best policy, reward=9.817! +[2024-08-20 19:44:55,204][01505] Fps is (10 sec: 4095.3, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1335296. Throughput: 0: 891.0. Samples: 332870. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:44:55,207][01505] Avg episode reward: [(0, '9.101')] +[2024-08-20 19:45:00,202][01505] Fps is (10 sec: 3276.9, 60 sec: 3345.2, 300 sec: 3429.5). Total num frames: 1347584. Throughput: 0: 840.8. Samples: 336920. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:45:00,205][01505] Avg episode reward: [(0, '9.470')] +[2024-08-20 19:45:00,955][03661] Updated weights for policy 0, policy_version 330 (0.0053) +[2024-08-20 19:45:05,202][01505] Fps is (10 sec: 3277.4, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1368064. Throughput: 0: 853.8. Samples: 342218. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:45:05,210][01505] Avg episode reward: [(0, '8.632')] +[2024-08-20 19:45:10,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3443.4). Total num frames: 1388544. Throughput: 0: 881.7. Samples: 345342. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:45:10,206][01505] Avg episode reward: [(0, '9.334')] +[2024-08-20 19:45:10,719][03661] Updated weights for policy 0, policy_version 340 (0.0029) +[2024-08-20 19:45:15,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3415.6). Total num frames: 1400832. Throughput: 0: 876.4. Samples: 350656. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:45:15,207][01505] Avg episode reward: [(0, '9.435')] +[2024-08-20 19:45:20,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.4, 300 sec: 3429.6). Total num frames: 1417216. Throughput: 0: 827.6. Samples: 354744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:45:20,210][01505] Avg episode reward: [(0, '9.801')] +[2024-08-20 19:45:23,387][03661] Updated weights for policy 0, policy_version 350 (0.0026) +[2024-08-20 19:45:25,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3443.4). Total num frames: 1437696. Throughput: 0: 857.4. Samples: 357932. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:45:25,204][01505] Avg episode reward: [(0, '10.144')] +[2024-08-20 19:45:25,281][03648] Saving new best policy, reward=10.144! +[2024-08-20 19:45:30,204][01505] Fps is (10 sec: 4095.2, 60 sec: 3549.8, 300 sec: 3443.4). Total num frames: 1458176. Throughput: 0: 908.2. Samples: 364378. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:45:30,209][01505] Avg episode reward: [(0, '10.319')] +[2024-08-20 19:45:30,214][03648] Saving new best policy, reward=10.319! +[2024-08-20 19:45:35,206][01505] Fps is (10 sec: 3275.5, 60 sec: 3413.1, 300 sec: 3429.5). Total num frames: 1470464. Throughput: 0: 860.5. Samples: 368440. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:45:35,215][01505] Avg episode reward: [(0, '10.268')] +[2024-08-20 19:45:35,236][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000359_1470464.pth... +[2024-08-20 19:45:35,427][03648] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000159_651264.pth +[2024-08-20 19:45:35,758][03661] Updated weights for policy 0, policy_version 360 (0.0045) +[2024-08-20 19:45:40,202][01505] Fps is (10 sec: 2867.7, 60 sec: 3481.6, 300 sec: 3443.5). Total num frames: 1486848. Throughput: 0: 834.3. Samples: 370414. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:45:40,208][01505] Avg episode reward: [(0, '10.165')] +[2024-08-20 19:45:45,202][01505] Fps is (10 sec: 4097.6, 60 sec: 3618.1, 300 sec: 3457.3). Total num frames: 1511424. Throughput: 0: 884.6. Samples: 376728. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:45:45,206][01505] Avg episode reward: [(0, '11.065')] +[2024-08-20 19:45:45,219][03648] Saving new best policy, reward=11.065! +[2024-08-20 19:45:46,397][03661] Updated weights for policy 0, policy_version 370 (0.0039) +[2024-08-20 19:45:50,204][01505] Fps is (10 sec: 3685.7, 60 sec: 3481.5, 300 sec: 3429.5). Total num frames: 1523712. Throughput: 0: 883.3. Samples: 381970. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:45:50,207][01505] Avg episode reward: [(0, '10.847')] +[2024-08-20 19:45:55,205][01505] Fps is (10 sec: 2457.1, 60 sec: 3345.1, 300 sec: 3415.6). Total num frames: 1536000. Throughput: 0: 855.8. Samples: 383856. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:45:55,208][01505] Avg episode reward: [(0, '11.357')] +[2024-08-20 19:45:55,242][03648] Saving new best policy, reward=11.357! +[2024-08-20 19:45:59,378][03661] Updated weights for policy 0, policy_version 380 (0.0052) +[2024-08-20 19:46:00,202][01505] Fps is (10 sec: 3277.4, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1556480. Throughput: 0: 850.4. Samples: 388922. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:46:00,208][01505] Avg episode reward: [(0, '11.339')] +[2024-08-20 19:46:05,202][01505] Fps is (10 sec: 4096.9, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 1576960. Throughput: 0: 897.6. Samples: 395136. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:46:05,209][01505] Avg episode reward: [(0, '11.093')] +[2024-08-20 19:46:10,203][01505] Fps is (10 sec: 3686.3, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1593344. Throughput: 0: 881.2. Samples: 397586. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:46:10,205][01505] Avg episode reward: [(0, '11.436')] +[2024-08-20 19:46:10,214][03648] Saving new best policy, reward=11.436! +[2024-08-20 19:46:11,318][03661] Updated weights for policy 0, policy_version 390 (0.0037) +[2024-08-20 19:46:15,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1609728. Throughput: 0: 822.4. Samples: 401386. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:46:15,205][01505] Avg episode reward: [(0, '12.658')] +[2024-08-20 19:46:15,214][03648] Saving new best policy, reward=12.658! +[2024-08-20 19:46:20,202][01505] Fps is (10 sec: 3686.6, 60 sec: 3549.9, 300 sec: 3443.4). Total num frames: 1630208. Throughput: 0: 865.3. Samples: 407376. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:46:20,205][01505] Avg episode reward: [(0, '12.884')] +[2024-08-20 19:46:20,209][03648] Saving new best policy, reward=12.884! +[2024-08-20 19:46:22,251][03661] Updated weights for policy 0, policy_version 400 (0.0034) +[2024-08-20 19:46:25,203][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 1646592. Throughput: 0: 889.1. Samples: 410424. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:46:25,205][01505] Avg episode reward: [(0, '12.887')] +[2024-08-20 19:46:25,275][03648] Saving new best policy, reward=12.887! +[2024-08-20 19:46:30,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.2, 300 sec: 3415.6). Total num frames: 1658880. Throughput: 0: 843.8. Samples: 414698. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:46:30,205][01505] Avg episode reward: [(0, '13.123')] +[2024-08-20 19:46:30,208][03648] Saving new best policy, reward=13.123! +[2024-08-20 19:46:35,196][03661] Updated weights for policy 0, policy_version 410 (0.0032) +[2024-08-20 19:46:35,202][01505] Fps is (10 sec: 3276.9, 60 sec: 3481.8, 300 sec: 3443.5). Total num frames: 1679360. Throughput: 0: 837.9. Samples: 419676. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:46:35,209][01505] Avg episode reward: [(0, '13.553')] +[2024-08-20 19:46:35,220][03648] Saving new best policy, reward=13.553! +[2024-08-20 19:46:40,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3443.4). Total num frames: 1699840. Throughput: 0: 863.8. Samples: 422724. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:46:40,205][01505] Avg episode reward: [(0, '13.491')] +[2024-08-20 19:46:45,204][01505] Fps is (10 sec: 3276.4, 60 sec: 3345.0, 300 sec: 3415.6). Total num frames: 1712128. Throughput: 0: 877.3. Samples: 428400. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:46:45,211][01505] Avg episode reward: [(0, '13.821')] +[2024-08-20 19:46:45,223][03648] Saving new best policy, reward=13.821! +[2024-08-20 19:46:47,476][03661] Updated weights for policy 0, policy_version 420 (0.0035) +[2024-08-20 19:46:50,202][01505] Fps is (10 sec: 2457.6, 60 sec: 3345.2, 300 sec: 3415.6). Total num frames: 1724416. Throughput: 0: 821.6. Samples: 432108. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:46:50,209][01505] Avg episode reward: [(0, '14.212')] +[2024-08-20 19:46:50,211][03648] Saving new best policy, reward=14.212! +[2024-08-20 19:46:55,203][01505] Fps is (10 sec: 3686.7, 60 sec: 3550.0, 300 sec: 3443.4). Total num frames: 1748992. Throughput: 0: 831.3. Samples: 434996. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:46:55,205][01505] Avg episode reward: [(0, '14.860')] +[2024-08-20 19:46:55,217][03648] Saving new best policy, reward=14.860! +[2024-08-20 19:46:58,313][03661] Updated weights for policy 0, policy_version 430 (0.0025) +[2024-08-20 19:47:00,204][01505] Fps is (10 sec: 4095.3, 60 sec: 3481.5, 300 sec: 3429.5). Total num frames: 1765376. Throughput: 0: 885.6. Samples: 441238. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:47:00,213][01505] Avg episode reward: [(0, '14.305')] +[2024-08-20 19:47:05,202][01505] Fps is (10 sec: 3276.9, 60 sec: 3413.3, 300 sec: 3429.6). Total num frames: 1781760. Throughput: 0: 851.7. Samples: 445704. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:47:05,209][01505] Avg episode reward: [(0, '14.254')] +[2024-08-20 19:47:10,202][01505] Fps is (10 sec: 3277.4, 60 sec: 3413.4, 300 sec: 3443.5). Total num frames: 1798144. Throughput: 0: 827.5. Samples: 447662. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:47:10,209][01505] Avg episode reward: [(0, '15.287')] +[2024-08-20 19:47:10,213][03648] Saving new best policy, reward=15.287! +[2024-08-20 19:47:11,211][03661] Updated weights for policy 0, policy_version 440 (0.0026) +[2024-08-20 19:47:15,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1818624. Throughput: 0: 864.9. Samples: 453620. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:47:15,210][01505] Avg episode reward: [(0, '13.876')] +[2024-08-20 19:47:20,204][01505] Fps is (10 sec: 3685.7, 60 sec: 3413.2, 300 sec: 3443.4). Total num frames: 1835008. Throughput: 0: 881.7. Samples: 459354. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:47:20,209][01505] Avg episode reward: [(0, '14.087')] +[2024-08-20 19:47:22,586][03661] Updated weights for policy 0, policy_version 450 (0.0044) +[2024-08-20 19:47:25,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 1847296. Throughput: 0: 856.5. Samples: 461266. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:47:25,207][01505] Avg episode reward: [(0, '14.885')] +[2024-08-20 19:47:30,202][01505] Fps is (10 sec: 3277.4, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 1867776. Throughput: 0: 836.5. Samples: 466042. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:47:30,210][01505] Avg episode reward: [(0, '15.590')] +[2024-08-20 19:47:30,214][03648] Saving new best policy, reward=15.590! +[2024-08-20 19:47:33,859][03661] Updated weights for policy 0, policy_version 460 (0.0043) +[2024-08-20 19:47:35,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 1888256. Throughput: 0: 893.9. Samples: 472334. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:47:35,205][01505] Avg episode reward: [(0, '16.275')] +[2024-08-20 19:47:35,222][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000461_1888256.pth... +[2024-08-20 19:47:35,386][03648] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000259_1060864.pth +[2024-08-20 19:47:35,400][03648] Saving new best policy, reward=16.275! +[2024-08-20 19:47:40,203][01505] Fps is (10 sec: 3276.7, 60 sec: 3345.0, 300 sec: 3429.5). Total num frames: 1900544. Throughput: 0: 887.6. Samples: 474940. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:47:40,206][01505] Avg episode reward: [(0, '17.129')] +[2024-08-20 19:47:40,211][03648] Saving new best policy, reward=17.129! +[2024-08-20 19:47:45,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.4, 300 sec: 3443.4). Total num frames: 1916928. Throughput: 0: 833.8. Samples: 478758. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:47:45,205][01505] Avg episode reward: [(0, '17.410')] +[2024-08-20 19:47:45,212][03648] Saving new best policy, reward=17.410! +[2024-08-20 19:47:47,141][03661] Updated weights for policy 0, policy_version 470 (0.0046) +[2024-08-20 19:47:50,203][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 1937408. Throughput: 0: 857.2. Samples: 484280. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:47:50,208][01505] Avg episode reward: [(0, '17.957')] +[2024-08-20 19:47:50,211][03648] Saving new best policy, reward=17.957! +[2024-08-20 19:47:55,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1957888. Throughput: 0: 883.2. Samples: 487408. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:47:55,207][01505] Avg episode reward: [(0, '18.593')] +[2024-08-20 19:47:55,217][03648] Saving new best policy, reward=18.593! +[2024-08-20 19:47:58,529][03661] Updated weights for policy 0, policy_version 480 (0.0035) +[2024-08-20 19:48:00,202][01505] Fps is (10 sec: 3276.9, 60 sec: 3413.4, 300 sec: 3429.6). Total num frames: 1970176. Throughput: 0: 857.2. Samples: 492194. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:48:00,207][01505] Avg episode reward: [(0, '18.648')] +[2024-08-20 19:48:00,209][03648] Saving new best policy, reward=18.648! +[2024-08-20 19:48:05,202][01505] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3429.6). Total num frames: 1982464. Throughput: 0: 824.7. Samples: 496462. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:48:05,207][01505] Avg episode reward: [(0, '17.700')] +[2024-08-20 19:48:10,205][01505] Fps is (10 sec: 3275.8, 60 sec: 3413.2, 300 sec: 3429.5). Total num frames: 2002944. Throughput: 0: 852.3. Samples: 499620. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:48:10,208][01505] Avg episode reward: [(0, '17.037')] +[2024-08-20 19:48:10,243][03661] Updated weights for policy 0, policy_version 490 (0.0016) +[2024-08-20 19:48:15,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 2023424. Throughput: 0: 883.2. Samples: 505786. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:48:15,208][01505] Avg episode reward: [(0, '16.754')] +[2024-08-20 19:48:20,202][01505] Fps is (10 sec: 3277.8, 60 sec: 3345.2, 300 sec: 3429.5). Total num frames: 2035712. Throughput: 0: 826.4. Samples: 509524. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:48:20,207][01505] Avg episode reward: [(0, '16.641')] +[2024-08-20 19:48:23,108][03661] Updated weights for policy 0, policy_version 500 (0.0040) +[2024-08-20 19:48:25,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 2056192. Throughput: 0: 823.8. Samples: 512012. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:48:25,205][01505] Avg episode reward: [(0, '17.028')] +[2024-08-20 19:48:30,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 2076672. Throughput: 0: 880.8. Samples: 518392. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:48:30,205][01505] Avg episode reward: [(0, '16.902')] +[2024-08-20 19:48:33,610][03661] Updated weights for policy 0, policy_version 510 (0.0029) +[2024-08-20 19:48:35,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 2088960. Throughput: 0: 868.2. Samples: 523350. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:48:35,209][01505] Avg episode reward: [(0, '18.664')] +[2024-08-20 19:48:35,220][03648] Saving new best policy, reward=18.664! +[2024-08-20 19:48:40,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.4, 300 sec: 3443.4). Total num frames: 2105344. Throughput: 0: 840.1. Samples: 525212. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:48:40,207][01505] Avg episode reward: [(0, '17.686')] +[2024-08-20 19:48:45,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 2125824. Throughput: 0: 858.5. Samples: 530826. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:48:45,207][01505] Avg episode reward: [(0, '17.194')] +[2024-08-20 19:48:45,656][03661] Updated weights for policy 0, policy_version 520 (0.0040) +[2024-08-20 19:48:50,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 2146304. Throughput: 0: 901.7. Samples: 537040. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:48:50,208][01505] Avg episode reward: [(0, '16.166')] +[2024-08-20 19:48:55,203][01505] Fps is (10 sec: 3276.5, 60 sec: 3345.0, 300 sec: 3429.5). Total num frames: 2158592. Throughput: 0: 875.1. Samples: 538998. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:48:55,206][01505] Avg episode reward: [(0, '17.125')] +[2024-08-20 19:48:58,519][03661] Updated weights for policy 0, policy_version 530 (0.0040) +[2024-08-20 19:49:00,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 2174976. Throughput: 0: 832.3. Samples: 543240. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-08-20 19:49:00,207][01505] Avg episode reward: [(0, '16.232')] +[2024-08-20 19:49:05,202][01505] Fps is (10 sec: 3686.7, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2195456. Throughput: 0: 888.7. Samples: 549514. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:49:05,204][01505] Avg episode reward: [(0, '15.210')] +[2024-08-20 19:49:09,080][03661] Updated weights for policy 0, policy_version 540 (0.0028) +[2024-08-20 19:49:10,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.8, 300 sec: 3429.5). Total num frames: 2211840. Throughput: 0: 903.0. Samples: 552646. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:49:10,209][01505] Avg episode reward: [(0, '15.791')] +[2024-08-20 19:49:15,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3429.6). Total num frames: 2224128. Throughput: 0: 848.0. Samples: 556552. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:49:15,210][01505] Avg episode reward: [(0, '15.450')] +[2024-08-20 19:49:20,203][01505] Fps is (10 sec: 3276.7, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 2244608. Throughput: 0: 859.3. Samples: 562018. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:49:20,206][01505] Avg episode reward: [(0, '15.189')] +[2024-08-20 19:49:21,304][03661] Updated weights for policy 0, policy_version 550 (0.0035) +[2024-08-20 19:49:25,202][01505] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 2269184. Throughput: 0: 887.2. Samples: 565134. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:49:25,209][01505] Avg episode reward: [(0, '15.725')] +[2024-08-20 19:49:30,202][01505] Fps is (10 sec: 3686.5, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 2281472. Throughput: 0: 880.2. Samples: 570436. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:49:30,206][01505] Avg episode reward: [(0, '16.004')] +[2024-08-20 19:49:34,056][03661] Updated weights for policy 0, policy_version 560 (0.0031) +[2024-08-20 19:49:35,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 2297856. Throughput: 0: 834.2. Samples: 574578. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:49:35,205][01505] Avg episode reward: [(0, '16.924')] +[2024-08-20 19:49:35,222][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000561_2297856.pth... +[2024-08-20 19:49:35,354][03648] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000359_1470464.pth +[2024-08-20 19:49:40,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 2318336. Throughput: 0: 860.6. Samples: 577724. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:49:40,205][01505] Avg episode reward: [(0, '17.867')] +[2024-08-20 19:49:43,882][03661] Updated weights for policy 0, policy_version 570 (0.0038) +[2024-08-20 19:49:45,208][01505] Fps is (10 sec: 3684.2, 60 sec: 3481.3, 300 sec: 3457.2). Total num frames: 2334720. Throughput: 0: 903.7. Samples: 583910. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:49:45,211][01505] Avg episode reward: [(0, '18.057')] +[2024-08-20 19:49:50,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3429.6). Total num frames: 2347008. Throughput: 0: 849.1. Samples: 587724. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:49:50,205][01505] Avg episode reward: [(0, '17.894')] +[2024-08-20 19:49:55,202][01505] Fps is (10 sec: 3278.8, 60 sec: 3481.7, 300 sec: 3457.3). Total num frames: 2367488. Throughput: 0: 823.0. Samples: 589682. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:49:55,209][01505] Avg episode reward: [(0, '18.408')] +[2024-08-20 19:49:57,026][03661] Updated weights for policy 0, policy_version 580 (0.0049) +[2024-08-20 19:50:00,203][01505] Fps is (10 sec: 4095.9, 60 sec: 3549.8, 300 sec: 3457.3). Total num frames: 2387968. Throughput: 0: 880.3. Samples: 596164. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:50:00,205][01505] Avg episode reward: [(0, '18.909')] +[2024-08-20 19:50:00,211][03648] Saving new best policy, reward=18.909! +[2024-08-20 19:50:05,204][01505] Fps is (10 sec: 3685.8, 60 sec: 3481.5, 300 sec: 3443.4). Total num frames: 2404352. Throughput: 0: 875.3. Samples: 601406. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:50:05,209][01505] Avg episode reward: [(0, '19.452')] +[2024-08-20 19:50:05,230][03648] Saving new best policy, reward=19.452! +[2024-08-20 19:50:09,762][03661] Updated weights for policy 0, policy_version 590 (0.0049) +[2024-08-20 19:50:10,202][01505] Fps is (10 sec: 2867.3, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 2416640. Throughput: 0: 846.8. Samples: 603240. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:50:10,205][01505] Avg episode reward: [(0, '20.035')] +[2024-08-20 19:50:10,208][03648] Saving new best policy, reward=20.035! +[2024-08-20 19:50:15,202][01505] Fps is (10 sec: 3277.3, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2437120. Throughput: 0: 844.1. Samples: 608422. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:50:15,205][01505] Avg episode reward: [(0, '21.196')] +[2024-08-20 19:50:15,216][03648] Saving new best policy, reward=21.196! +[2024-08-20 19:50:19,679][03661] Updated weights for policy 0, policy_version 600 (0.0034) +[2024-08-20 19:50:20,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2457600. Throughput: 0: 891.3. Samples: 614688. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:50:20,205][01505] Avg episode reward: [(0, '20.946')] +[2024-08-20 19:50:25,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3429.6). Total num frames: 2469888. Throughput: 0: 870.3. Samples: 616886. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-08-20 19:50:25,206][01505] Avg episode reward: [(0, '20.992')] +[2024-08-20 19:50:30,203][01505] Fps is (10 sec: 2867.1, 60 sec: 3413.3, 300 sec: 3443.5). Total num frames: 2486272. Throughput: 0: 819.0. Samples: 620760. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:50:30,210][01505] Avg episode reward: [(0, '20.621')] +[2024-08-20 19:50:32,814][03661] Updated weights for policy 0, policy_version 610 (0.0022) +[2024-08-20 19:50:35,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 2506752. Throughput: 0: 874.2. Samples: 627062. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:50:35,208][01505] Avg episode reward: [(0, '20.967')] +[2024-08-20 19:50:40,203][01505] Fps is (10 sec: 3686.2, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 2523136. Throughput: 0: 899.6. Samples: 630164. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:50:40,211][01505] Avg episode reward: [(0, '20.878')] +[2024-08-20 19:50:45,062][03661] Updated weights for policy 0, policy_version 620 (0.0019) +[2024-08-20 19:50:45,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3413.7, 300 sec: 3443.4). Total num frames: 2539520. Throughput: 0: 850.4. Samples: 634430. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:50:45,213][01505] Avg episode reward: [(0, '21.623')] +[2024-08-20 19:50:45,232][03648] Saving new best policy, reward=21.623! +[2024-08-20 19:50:50,202][01505] Fps is (10 sec: 3277.1, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 2555904. Throughput: 0: 844.9. Samples: 639424. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:50:50,211][01505] Avg episode reward: [(0, '22.650')] +[2024-08-20 19:50:50,214][03648] Saving new best policy, reward=22.650! +[2024-08-20 19:50:55,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 2576384. Throughput: 0: 869.7. Samples: 642378. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:50:55,210][01505] Avg episode reward: [(0, '22.830')] +[2024-08-20 19:50:55,221][03648] Saving new best policy, reward=22.830! +[2024-08-20 19:50:55,905][03661] Updated weights for policy 0, policy_version 630 (0.0025) +[2024-08-20 19:51:00,203][01505] Fps is (10 sec: 3686.2, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 2592768. Throughput: 0: 877.8. Samples: 647924. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:51:00,206][01505] Avg episode reward: [(0, '24.076')] +[2024-08-20 19:51:00,213][03648] Saving new best policy, reward=24.076! +[2024-08-20 19:51:05,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.2, 300 sec: 3429.5). Total num frames: 2605056. Throughput: 0: 823.5. Samples: 651744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:51:05,209][01505] Avg episode reward: [(0, '24.347')] +[2024-08-20 19:51:05,219][03648] Saving new best policy, reward=24.347! +[2024-08-20 19:51:08,783][03661] Updated weights for policy 0, policy_version 640 (0.0030) +[2024-08-20 19:51:10,204][01505] Fps is (10 sec: 3276.4, 60 sec: 3481.5, 300 sec: 3443.4). Total num frames: 2625536. Throughput: 0: 838.6. Samples: 654624. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:51:10,211][01505] Avg episode reward: [(0, '23.566')] +[2024-08-20 19:51:15,208][01505] Fps is (10 sec: 4093.6, 60 sec: 3481.3, 300 sec: 3443.3). Total num frames: 2646016. Throughput: 0: 891.0. Samples: 660862. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:51:15,211][01505] Avg episode reward: [(0, '24.045')] +[2024-08-20 19:51:20,205][01505] Fps is (10 sec: 3276.6, 60 sec: 3344.9, 300 sec: 3429.5). Total num frames: 2658304. Throughput: 0: 850.3. Samples: 665326. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:51:20,207][01505] Avg episode reward: [(0, '23.915')] +[2024-08-20 19:51:20,991][03661] Updated weights for policy 0, policy_version 650 (0.0052) +[2024-08-20 19:51:25,202][01505] Fps is (10 sec: 2868.9, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 2674688. Throughput: 0: 823.5. Samples: 667220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:51:25,209][01505] Avg episode reward: [(0, '22.560')] +[2024-08-20 19:51:30,202][01505] Fps is (10 sec: 3687.3, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 2695168. Throughput: 0: 861.4. Samples: 673194. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:51:30,205][01505] Avg episode reward: [(0, '21.572')] +[2024-08-20 19:51:31,690][03661] Updated weights for policy 0, policy_version 660 (0.0020) +[2024-08-20 19:51:35,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 2715648. Throughput: 0: 883.2. Samples: 679166. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:51:35,212][01505] Avg episode reward: [(0, '21.552')] +[2024-08-20 19:51:35,224][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000663_2715648.pth... +[2024-08-20 19:51:35,387][03648] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000461_1888256.pth +[2024-08-20 19:51:40,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3413.4, 300 sec: 3443.4). Total num frames: 2727936. Throughput: 0: 859.0. Samples: 681034. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:51:40,205][01505] Avg episode reward: [(0, '22.343')] +[2024-08-20 19:51:44,549][03661] Updated weights for policy 0, policy_version 670 (0.0044) +[2024-08-20 19:51:45,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 2744320. Throughput: 0: 840.5. Samples: 685744. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:51:45,208][01505] Avg episode reward: [(0, '21.014')] +[2024-08-20 19:51:50,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 2764800. Throughput: 0: 896.6. Samples: 692092. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:51:50,206][01505] Avg episode reward: [(0, '21.586')] +[2024-08-20 19:51:55,203][01505] Fps is (10 sec: 3686.1, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 2781184. Throughput: 0: 893.5. Samples: 694832. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:51:55,208][01505] Avg episode reward: [(0, '20.781')] +[2024-08-20 19:51:55,745][03661] Updated weights for policy 0, policy_version 680 (0.0021) +[2024-08-20 19:52:00,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 2793472. Throughput: 0: 839.4. Samples: 698628. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:52:00,205][01505] Avg episode reward: [(0, '20.828')] +[2024-08-20 19:52:05,203][01505] Fps is (10 sec: 3277.0, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 2813952. Throughput: 0: 864.3. Samples: 704216. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:52:05,210][01505] Avg episode reward: [(0, '19.885')] +[2024-08-20 19:52:07,392][03661] Updated weights for policy 0, policy_version 690 (0.0029) +[2024-08-20 19:52:10,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.7, 300 sec: 3443.4). Total num frames: 2834432. Throughput: 0: 892.1. Samples: 707366. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:52:10,205][01505] Avg episode reward: [(0, '18.124')] +[2024-08-20 19:52:15,202][01505] Fps is (10 sec: 3686.5, 60 sec: 3413.7, 300 sec: 3443.4). Total num frames: 2850816. Throughput: 0: 867.1. Samples: 712214. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:52:15,206][01505] Avg episode reward: [(0, '18.246')] +[2024-08-20 19:52:20,121][03661] Updated weights for policy 0, policy_version 700 (0.0020) +[2024-08-20 19:52:20,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3481.7, 300 sec: 3457.3). Total num frames: 2867200. Throughput: 0: 836.0. Samples: 716788. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:52:20,205][01505] Avg episode reward: [(0, '18.832')] +[2024-08-20 19:52:25,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2887680. Throughput: 0: 865.9. Samples: 720000. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:52:25,207][01505] Avg episode reward: [(0, '19.140')] +[2024-08-20 19:52:30,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 2904064. Throughput: 0: 896.9. Samples: 726104. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:52:30,208][01505] Avg episode reward: [(0, '20.049')] +[2024-08-20 19:52:30,826][03661] Updated weights for policy 0, policy_version 710 (0.0027) +[2024-08-20 19:52:35,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3443.4). Total num frames: 2916352. Throughput: 0: 839.6. Samples: 729876. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:52:35,204][01505] Avg episode reward: [(0, '20.285')] +[2024-08-20 19:52:40,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 2936832. Throughput: 0: 837.5. Samples: 732520. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:52:40,207][01505] Avg episode reward: [(0, '21.307')] +[2024-08-20 19:52:42,758][03661] Updated weights for policy 0, policy_version 720 (0.0033) +[2024-08-20 19:52:45,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2957312. Throughput: 0: 896.1. Samples: 738954. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:52:45,208][01505] Avg episode reward: [(0, '21.633')] +[2024-08-20 19:52:50,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 2973696. Throughput: 0: 878.5. Samples: 743746. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:52:50,209][01505] Avg episode reward: [(0, '22.629')] +[2024-08-20 19:52:55,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.4, 300 sec: 3443.4). Total num frames: 2985984. Throughput: 0: 851.5. Samples: 745682. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:52:55,206][01505] Avg episode reward: [(0, '22.982')] +[2024-08-20 19:52:55,576][03661] Updated weights for policy 0, policy_version 730 (0.0038) +[2024-08-20 19:53:00,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3006464. Throughput: 0: 867.6. Samples: 751258. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:53:00,205][01505] Avg episode reward: [(0, '22.693')] +[2024-08-20 19:53:05,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3026944. Throughput: 0: 904.9. Samples: 757508. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:53:05,209][01505] Avg episode reward: [(0, '22.761')] +[2024-08-20 19:53:05,688][03661] Updated weights for policy 0, policy_version 740 (0.0026) +[2024-08-20 19:53:10,203][01505] Fps is (10 sec: 3276.5, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 3039232. Throughput: 0: 875.2. Samples: 759386. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:53:10,206][01505] Avg episode reward: [(0, '22.898')] +[2024-08-20 19:53:15,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 3055616. Throughput: 0: 834.1. Samples: 763638. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:53:15,205][01505] Avg episode reward: [(0, '22.616')] +[2024-08-20 19:53:18,287][03661] Updated weights for policy 0, policy_version 750 (0.0035) +[2024-08-20 19:53:20,202][01505] Fps is (10 sec: 3686.7, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3076096. Throughput: 0: 889.7. Samples: 769914. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:53:20,205][01505] Avg episode reward: [(0, '21.201')] +[2024-08-20 19:53:25,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3096576. Throughput: 0: 900.6. Samples: 773046. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:53:25,205][01505] Avg episode reward: [(0, '21.857')] +[2024-08-20 19:53:30,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 3108864. Throughput: 0: 843.6. Samples: 776916. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:53:30,210][01505] Avg episode reward: [(0, '21.720')] +[2024-08-20 19:53:31,308][03661] Updated weights for policy 0, policy_version 760 (0.0025) +[2024-08-20 19:53:35,203][01505] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3129344. Throughput: 0: 857.5. Samples: 782332. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:53:35,210][01505] Avg episode reward: [(0, '23.216')] +[2024-08-20 19:53:35,223][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000764_3129344.pth... +[2024-08-20 19:53:35,369][03648] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000561_2297856.pth +[2024-08-20 19:53:40,204][01505] Fps is (10 sec: 4095.2, 60 sec: 3549.8, 300 sec: 3471.2). Total num frames: 3149824. Throughput: 0: 883.0. Samples: 785418. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:53:40,210][01505] Avg episode reward: [(0, '23.502')] +[2024-08-20 19:53:41,153][03661] Updated weights for policy 0, policy_version 770 (0.0045) +[2024-08-20 19:53:45,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 3162112. Throughput: 0: 873.7. Samples: 790576. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:53:45,206][01505] Avg episode reward: [(0, '22.542')] +[2024-08-20 19:53:50,202][01505] Fps is (10 sec: 2867.7, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 3178496. Throughput: 0: 828.0. Samples: 794770. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:53:50,209][01505] Avg episode reward: [(0, '23.039')] +[2024-08-20 19:53:53,874][03661] Updated weights for policy 0, policy_version 780 (0.0041) +[2024-08-20 19:53:55,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3198976. Throughput: 0: 856.6. Samples: 797932. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:53:55,211][01505] Avg episode reward: [(0, '22.699')] +[2024-08-20 19:54:00,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3215360. Throughput: 0: 900.1. Samples: 804142. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:54:00,207][01505] Avg episode reward: [(0, '22.995')] +[2024-08-20 19:54:05,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3443.4). Total num frames: 3227648. Throughput: 0: 846.7. Samples: 808016. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:54:05,208][01505] Avg episode reward: [(0, '22.453')] +[2024-08-20 19:54:06,984][03661] Updated weights for policy 0, policy_version 790 (0.0030) +[2024-08-20 19:54:10,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3481.7, 300 sec: 3471.2). Total num frames: 3248128. Throughput: 0: 823.8. Samples: 810118. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:54:10,205][01505] Avg episode reward: [(0, '21.673')] +[2024-08-20 19:54:15,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3268608. Throughput: 0: 880.0. Samples: 816518. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:54:15,205][01505] Avg episode reward: [(0, '22.506')] +[2024-08-20 19:54:16,822][03661] Updated weights for policy 0, policy_version 800 (0.0031) +[2024-08-20 19:54:20,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 3284992. Throughput: 0: 879.2. Samples: 821894. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:54:20,205][01505] Avg episode reward: [(0, '21.896')] +[2024-08-20 19:54:25,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3443.4). Total num frames: 3297280. Throughput: 0: 854.9. Samples: 823886. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:54:25,205][01505] Avg episode reward: [(0, '21.935')] +[2024-08-20 19:54:29,686][03661] Updated weights for policy 0, policy_version 810 (0.0019) +[2024-08-20 19:54:30,203][01505] Fps is (10 sec: 3276.7, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3317760. Throughput: 0: 851.6. Samples: 828900. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:54:30,208][01505] Avg episode reward: [(0, '21.313')] +[2024-08-20 19:54:35,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3338240. Throughput: 0: 899.0. Samples: 835226. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:54:35,207][01505] Avg episode reward: [(0, '22.887')] +[2024-08-20 19:54:40,202][01505] Fps is (10 sec: 3686.5, 60 sec: 3413.4, 300 sec: 3457.4). Total num frames: 3354624. Throughput: 0: 880.1. Samples: 837536. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:54:40,204][01505] Avg episode reward: [(0, '23.246')] +[2024-08-20 19:54:41,924][03661] Updated weights for policy 0, policy_version 820 (0.0043) +[2024-08-20 19:54:45,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 3366912. Throughput: 0: 827.3. Samples: 841372. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:54:45,210][01505] Avg episode reward: [(0, '24.724')] +[2024-08-20 19:54:45,223][03648] Saving new best policy, reward=24.724! +[2024-08-20 19:54:50,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3387392. Throughput: 0: 879.8. Samples: 847608. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:54:50,209][01505] Avg episode reward: [(0, '24.047')] +[2024-08-20 19:54:52,433][03661] Updated weights for policy 0, policy_version 830 (0.0038) +[2024-08-20 19:54:55,203][01505] Fps is (10 sec: 4095.9, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3407872. Throughput: 0: 901.1. Samples: 850668. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:54:55,214][01505] Avg episode reward: [(0, '23.693')] +[2024-08-20 19:55:00,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 3420160. Throughput: 0: 852.7. Samples: 854890. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:55:00,205][01505] Avg episode reward: [(0, '23.530')] +[2024-08-20 19:55:05,202][01505] Fps is (10 sec: 2867.3, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3436544. Throughput: 0: 839.3. Samples: 859662. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:55:05,211][01505] Avg episode reward: [(0, '22.352')] +[2024-08-20 19:55:05,510][03661] Updated weights for policy 0, policy_version 840 (0.0023) +[2024-08-20 19:55:10,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3457024. Throughput: 0: 864.9. Samples: 862806. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:55:10,204][01505] Avg episode reward: [(0, '22.638')] +[2024-08-20 19:55:15,204][01505] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 3473408. Throughput: 0: 882.8. Samples: 868626. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2024-08-20 19:55:15,207][01505] Avg episode reward: [(0, '22.170')] +[2024-08-20 19:55:17,410][03661] Updated weights for policy 0, policy_version 850 (0.0065) +[2024-08-20 19:55:20,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3443.4). Total num frames: 3485696. Throughput: 0: 827.6. Samples: 872468. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:55:20,208][01505] Avg episode reward: [(0, '22.524')] +[2024-08-20 19:55:25,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3510272. Throughput: 0: 843.1. Samples: 875474. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:55:25,205][01505] Avg episode reward: [(0, '23.235')] +[2024-08-20 19:55:28,267][03661] Updated weights for policy 0, policy_version 860 (0.0028) +[2024-08-20 19:55:30,202][01505] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3530752. Throughput: 0: 895.3. Samples: 881660. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:55:30,204][01505] Avg episode reward: [(0, '25.199')] +[2024-08-20 19:55:30,214][03648] Saving new best policy, reward=25.199! +[2024-08-20 19:55:35,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 3543040. Throughput: 0: 852.2. Samples: 885958. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:55:35,207][01505] Avg episode reward: [(0, '24.590')] +[2024-08-20 19:55:35,224][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000865_3543040.pth... +[2024-08-20 19:55:35,432][03648] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000663_2715648.pth +[2024-08-20 19:55:40,202][01505] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3443.4). Total num frames: 3555328. Throughput: 0: 824.6. Samples: 887776. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:55:40,205][01505] Avg episode reward: [(0, '24.270')] +[2024-08-20 19:55:41,376][03661] Updated weights for policy 0, policy_version 870 (0.0044) +[2024-08-20 19:55:45,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3575808. Throughput: 0: 866.1. Samples: 893864. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:55:45,211][01505] Avg episode reward: [(0, '24.814')] +[2024-08-20 19:55:50,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3596288. Throughput: 0: 888.6. Samples: 899648. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:55:50,204][01505] Avg episode reward: [(0, '25.000')] +[2024-08-20 19:55:52,729][03661] Updated weights for policy 0, policy_version 880 (0.0017) +[2024-08-20 19:55:55,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3443.4). Total num frames: 3608576. Throughput: 0: 862.4. Samples: 901612. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:55:55,210][01505] Avg episode reward: [(0, '24.157')] +[2024-08-20 19:56:00,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3471.2). Total num frames: 3629056. Throughput: 0: 835.8. Samples: 906236. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:56:00,205][01505] Avg episode reward: [(0, '22.479')] +[2024-08-20 19:56:04,182][03661] Updated weights for policy 0, policy_version 890 (0.0019) +[2024-08-20 19:56:05,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3649536. Throughput: 0: 887.8. Samples: 912420. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:56:05,210][01505] Avg episode reward: [(0, '23.379')] +[2024-08-20 19:56:10,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3443.5). Total num frames: 3661824. Throughput: 0: 880.6. Samples: 915100. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:56:10,205][01505] Avg episode reward: [(0, '23.223')] +[2024-08-20 19:56:15,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 3678208. Throughput: 0: 829.5. Samples: 918988. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:56:15,210][01505] Avg episode reward: [(0, '23.522')] +[2024-08-20 19:56:17,066][03661] Updated weights for policy 0, policy_version 900 (0.0026) +[2024-08-20 19:56:20,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3698688. Throughput: 0: 865.0. Samples: 924882. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:56:20,210][01505] Avg episode reward: [(0, '23.702')] +[2024-08-20 19:56:25,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3471.2). Total num frames: 3719168. Throughput: 0: 895.9. Samples: 928090. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:56:25,209][01505] Avg episode reward: [(0, '24.878')] +[2024-08-20 19:56:28,121][03661] Updated weights for policy 0, policy_version 910 (0.0018) +[2024-08-20 19:56:30,203][01505] Fps is (10 sec: 3276.7, 60 sec: 3345.0, 300 sec: 3443.4). Total num frames: 3731456. Throughput: 0: 862.1. Samples: 932658. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-08-20 19:56:30,211][01505] Avg episode reward: [(0, '24.505')] +[2024-08-20 19:56:35,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 3747840. Throughput: 0: 837.2. Samples: 937320. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:56:35,211][01505] Avg episode reward: [(0, '23.346')] +[2024-08-20 19:56:39,630][03661] Updated weights for policy 0, policy_version 920 (0.0029) +[2024-08-20 19:56:40,203][01505] Fps is (10 sec: 3686.4, 60 sec: 3549.8, 300 sec: 3471.2). Total num frames: 3768320. Throughput: 0: 864.3. Samples: 940508. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:56:40,205][01505] Avg episode reward: [(0, '24.019')] +[2024-08-20 19:56:45,205][01505] Fps is (10 sec: 3685.4, 60 sec: 3481.4, 300 sec: 3457.3). Total num frames: 3784704. Throughput: 0: 895.8. Samples: 946548. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:56:45,212][01505] Avg episode reward: [(0, '23.696')] +[2024-08-20 19:56:50,202][01505] Fps is (10 sec: 3277.0, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 3801088. Throughput: 0: 844.2. Samples: 950410. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-08-20 19:56:50,205][01505] Avg episode reward: [(0, '23.212')] +[2024-08-20 19:56:52,528][03661] Updated weights for policy 0, policy_version 930 (0.0037) +[2024-08-20 19:56:55,202][01505] Fps is (10 sec: 3277.7, 60 sec: 3481.6, 300 sec: 3471.2). Total num frames: 3817472. Throughput: 0: 840.8. Samples: 952934. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-08-20 19:56:55,211][01505] Avg episode reward: [(0, '21.766')] +[2024-08-20 19:57:00,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3471.2). Total num frames: 3837952. Throughput: 0: 892.4. Samples: 959148. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:57:00,208][01505] Avg episode reward: [(0, '22.952')] +[2024-08-20 19:57:03,262][03661] Updated weights for policy 0, policy_version 940 (0.0026) +[2024-08-20 19:57:05,202][01505] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 3854336. Throughput: 0: 869.9. Samples: 964028. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:57:05,205][01505] Avg episode reward: [(0, '24.525')] +[2024-08-20 19:57:10,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 3866624. Throughput: 0: 840.4. Samples: 965908. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:57:10,209][01505] Avg episode reward: [(0, '24.202')] +[2024-08-20 19:57:15,202][01505] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3887104. Throughput: 0: 864.3. Samples: 971552. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:57:15,210][01505] Avg episode reward: [(0, '24.451')] +[2024-08-20 19:57:15,369][03661] Updated weights for policy 0, policy_version 950 (0.0050) +[2024-08-20 19:57:20,202][01505] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 3907584. Throughput: 0: 899.2. Samples: 977786. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:57:20,212][01505] Avg episode reward: [(0, '24.123')] +[2024-08-20 19:57:25,207][01505] Fps is (10 sec: 3275.2, 60 sec: 3344.8, 300 sec: 3443.4). Total num frames: 3919872. Throughput: 0: 870.4. Samples: 979678. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-08-20 19:57:25,210][01505] Avg episode reward: [(0, '23.956')] +[2024-08-20 19:57:28,062][03661] Updated weights for policy 0, policy_version 960 (0.0048) +[2024-08-20 19:57:30,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.4, 300 sec: 3457.3). Total num frames: 3936256. Throughput: 0: 831.1. Samples: 983944. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-08-20 19:57:30,205][01505] Avg episode reward: [(0, '22.512')] +[2024-08-20 19:57:35,203][01505] Fps is (10 sec: 4098.0, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3960832. Throughput: 0: 886.8. Samples: 990316. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:57:35,205][01505] Avg episode reward: [(0, '22.566')] +[2024-08-20 19:57:35,221][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000967_3960832.pth... +[2024-08-20 19:57:35,347][03648] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000764_3129344.pth +[2024-08-20 19:57:38,570][03661] Updated weights for policy 0, policy_version 970 (0.0026) +[2024-08-20 19:57:40,205][01505] Fps is (10 sec: 4094.8, 60 sec: 3481.5, 300 sec: 3457.3). Total num frames: 3977216. Throughput: 0: 896.2. Samples: 993266. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-08-20 19:57:40,212][01505] Avg episode reward: [(0, '22.999')] +[2024-08-20 19:57:45,202][01505] Fps is (10 sec: 2867.2, 60 sec: 3413.5, 300 sec: 3443.4). Total num frames: 3989504. Throughput: 0: 843.3. Samples: 997096. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-08-20 19:57:45,208][01505] Avg episode reward: [(0, '23.139')] +[2024-08-20 19:57:48,984][03648] Stopping Batcher_0... +[2024-08-20 19:57:48,985][03648] Loop batcher_evt_loop terminating... +[2024-08-20 19:57:48,986][01505] Component Batcher_0 stopped! +[2024-08-20 19:57:48,998][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-08-20 19:57:49,070][03661] Weights refcount: 2 0 +[2024-08-20 19:57:49,079][01505] Component InferenceWorker_p0-w0 stopped! +[2024-08-20 19:57:49,086][03661] Stopping InferenceWorker_p0-w0... +[2024-08-20 19:57:49,087][03661] Loop inference_proc0-0_evt_loop terminating... +[2024-08-20 19:57:49,161][03648] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000865_3543040.pth +[2024-08-20 19:57:49,176][03648] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-08-20 19:57:49,232][01505] Component RolloutWorker_w4 stopped! +[2024-08-20 19:57:49,239][03666] Stopping RolloutWorker_w4... +[2024-08-20 19:57:49,241][03666] Loop rollout_proc4_evt_loop terminating... +[2024-08-20 19:57:49,247][01505] Component RolloutWorker_w2 stopped! +[2024-08-20 19:57:49,254][03664] Stopping RolloutWorker_w2... +[2024-08-20 19:57:49,258][01505] Component RolloutWorker_w6 stopped! +[2024-08-20 19:57:49,264][03668] Stopping RolloutWorker_w6... +[2024-08-20 19:57:49,255][03664] Loop rollout_proc2_evt_loop terminating... +[2024-08-20 19:57:49,265][03668] Loop rollout_proc6_evt_loop terminating... +[2024-08-20 19:57:49,289][01505] Component RolloutWorker_w0 stopped! +[2024-08-20 19:57:49,295][03662] Stopping RolloutWorker_w0... +[2024-08-20 19:57:49,296][03662] Loop rollout_proc0_evt_loop terminating... +[2024-08-20 19:57:49,415][03648] Stopping LearnerWorker_p0... +[2024-08-20 19:57:49,415][03648] Loop learner_proc0_evt_loop terminating... +[2024-08-20 19:57:49,416][01505] Component LearnerWorker_p0 stopped! +[2024-08-20 19:57:49,593][01505] Component RolloutWorker_w3 stopped! +[2024-08-20 19:57:49,595][03665] Stopping RolloutWorker_w3... +[2024-08-20 19:57:49,597][03665] Loop rollout_proc3_evt_loop terminating... +[2024-08-20 19:57:49,610][01505] Component RolloutWorker_w7 stopped! +[2024-08-20 19:57:49,612][03669] Stopping RolloutWorker_w7... +[2024-08-20 19:57:49,617][03669] Loop rollout_proc7_evt_loop terminating... +[2024-08-20 19:57:49,645][01505] Component RolloutWorker_w5 stopped! +[2024-08-20 19:57:49,647][03667] Stopping RolloutWorker_w5... +[2024-08-20 19:57:49,647][03667] Loop rollout_proc5_evt_loop terminating... +[2024-08-20 19:57:49,653][01505] Component RolloutWorker_w1 stopped! +[2024-08-20 19:57:49,656][01505] Waiting for process learner_proc0 to stop... +[2024-08-20 19:57:49,658][03663] Stopping RolloutWorker_w1... +[2024-08-20 19:57:49,663][03663] Loop rollout_proc1_evt_loop terminating... +[2024-08-20 19:57:50,880][01505] Waiting for process inference_proc0-0 to join... +[2024-08-20 19:57:50,885][01505] Waiting for process rollout_proc0 to join... +[2024-08-20 19:57:52,904][01505] Waiting for process rollout_proc1 to join... +[2024-08-20 19:57:52,959][01505] Waiting for process rollout_proc2 to join... +[2024-08-20 19:57:52,970][01505] Waiting for process rollout_proc3 to join... +[2024-08-20 19:57:52,975][01505] Waiting for process rollout_proc4 to join... +[2024-08-20 19:57:52,982][01505] Waiting for process rollout_proc5 to join... +[2024-08-20 19:57:52,987][01505] Waiting for process rollout_proc6 to join... +[2024-08-20 19:57:52,995][01505] Waiting for process rollout_proc7 to join... +[2024-08-20 19:57:53,001][01505] Batcher 0 profile tree view: +batching: 30.6752, releasing_batches: 0.0294 +[2024-08-20 19:57:53,007][01505] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0049 + wait_policy_total: 406.2439 +update_model: 10.9448 + weight_update: 0.0052 +one_step: 0.0025 + handle_policy_step: 712.4682 + deserialize: 17.5885, stack: 3.5964, obs_to_device_normalize: 141.7811, forward: 385.0134, send_messages: 33.9896 + prepare_outputs: 95.6235 + to_cpu: 55.7291 +[2024-08-20 19:57:53,012][01505] Learner 0 profile tree view: +misc: 0.0104, prepare_batch: 14.5309 +train: 77.8873 + epoch_init: 0.0063, minibatch_init: 0.0089, losses_postprocess: 0.7256, kl_divergence: 0.7973, after_optimizer: 34.1242 + calculate_losses: 29.0326 + losses_init: 0.0122, forward_head: 1.5624, bptt_initial: 19.2236, tail: 1.3265, advantages_returns: 0.3214, losses: 3.9633 + bptt: 2.2986 + bptt_forward_core: 2.1688 + update: 12.4319 + clip: 1.0305 +[2024-08-20 19:57:53,016][01505] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.4489, enqueue_policy_requests: 108.4394, env_step: 914.2360, overhead: 16.3545, complete_rollouts: 8.4363 +save_policy_outputs: 24.7832 + split_output_tensors: 9.8831 +[2024-08-20 19:57:53,020][01505] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.4413, enqueue_policy_requests: 108.1771, env_step: 911.9239, overhead: 16.4426, complete_rollouts: 7.2148 +save_policy_outputs: 24.1980 + split_output_tensors: 9.3642 +[2024-08-20 19:57:53,023][01505] Loop Runner_EvtLoop terminating... +[2024-08-20 19:57:53,025][01505] Runner profile tree view: +main_loop: 1210.4011 +[2024-08-20 19:57:53,026][01505] Collected {0: 4005888}, FPS: 3309.6 +[2024-08-20 19:57:53,568][01505] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-08-20 19:57:53,570][01505] Overriding arg 'num_workers' with value 1 passed from command line +[2024-08-20 19:57:53,573][01505] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-08-20 19:57:53,575][01505] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-08-20 19:57:53,577][01505] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-08-20 19:57:53,580][01505] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-08-20 19:57:53,582][01505] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-08-20 19:57:53,583][01505] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-08-20 19:57:53,584][01505] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-08-20 19:57:53,585][01505] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-08-20 19:57:53,586][01505] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-08-20 19:57:53,587][01505] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-08-20 19:57:53,589][01505] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-08-20 19:57:53,591][01505] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-08-20 19:57:53,592][01505] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-08-20 19:57:53,642][01505] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-08-20 19:57:53,648][01505] RunningMeanStd input shape: (3, 72, 128) +[2024-08-20 19:57:53,651][01505] RunningMeanStd input shape: (1,) +[2024-08-20 19:57:53,672][01505] ConvEncoder: input_channels=3 +[2024-08-20 19:57:53,790][01505] Conv encoder output size: 512 +[2024-08-20 19:57:53,791][01505] Policy head output size: 512 +[2024-08-20 19:57:53,976][01505] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-08-20 19:57:54,818][01505] Num frames 100... +[2024-08-20 19:57:54,951][01505] Num frames 200... +[2024-08-20 19:57:55,080][01505] Num frames 300... +[2024-08-20 19:57:55,217][01505] Num frames 400... +[2024-08-20 19:57:55,354][01505] Num frames 500... +[2024-08-20 19:57:55,490][01505] Num frames 600... +[2024-08-20 19:57:55,656][01505] Num frames 700... +[2024-08-20 19:57:55,841][01505] Num frames 800... +[2024-08-20 19:57:56,026][01505] Num frames 900... +[2024-08-20 19:57:56,213][01505] Num frames 1000... +[2024-08-20 19:57:56,403][01505] Num frames 1100... +[2024-08-20 19:57:56,584][01505] Num frames 1200... +[2024-08-20 19:57:56,773][01505] Num frames 1300... +[2024-08-20 19:57:56,949][01505] Num frames 1400... +[2024-08-20 19:57:57,137][01505] Num frames 1500... +[2024-08-20 19:57:57,334][01505] Num frames 1600... +[2024-08-20 19:57:57,537][01505] Num frames 1700... +[2024-08-20 19:57:57,728][01505] Num frames 1800... +[2024-08-20 19:57:57,917][01505] Num frames 1900... +[2024-08-20 19:57:58,114][01505] Num frames 2000... +[2024-08-20 19:57:58,321][01505] Num frames 2100... +[2024-08-20 19:57:58,377][01505] Avg episode rewards: #0: 62.999, true rewards: #0: 21.000 +[2024-08-20 19:57:58,379][01505] Avg episode reward: 62.999, avg true_objective: 21.000 +[2024-08-20 19:57:58,512][01505] Num frames 2200... +[2024-08-20 19:57:58,650][01505] Num frames 2300... +[2024-08-20 19:57:58,781][01505] Num frames 2400... +[2024-08-20 19:57:58,941][01505] Num frames 2500... +[2024-08-20 19:57:59,073][01505] Num frames 2600... +[2024-08-20 19:57:59,199][01505] Num frames 2700... +[2024-08-20 19:57:59,326][01505] Num frames 2800... +[2024-08-20 19:57:59,467][01505] Num frames 2900... +[2024-08-20 19:57:59,571][01505] Avg episode rewards: #0: 41.659, true rewards: #0: 14.660 +[2024-08-20 19:57:59,573][01505] Avg episode reward: 41.659, avg true_objective: 14.660 +[2024-08-20 19:57:59,663][01505] Num frames 3000... +[2024-08-20 19:57:59,797][01505] Num frames 3100... +[2024-08-20 19:57:59,930][01505] Num frames 3200... +[2024-08-20 19:58:00,070][01505] Num frames 3300... +[2024-08-20 19:58:00,200][01505] Num frames 3400... +[2024-08-20 19:58:00,332][01505] Num frames 3500... +[2024-08-20 19:58:00,474][01505] Num frames 3600... +[2024-08-20 19:58:00,614][01505] Num frames 3700... +[2024-08-20 19:58:00,749][01505] Num frames 3800... +[2024-08-20 19:58:00,882][01505] Avg episode rewards: #0: 34.533, true rewards: #0: 12.867 +[2024-08-20 19:58:00,883][01505] Avg episode reward: 34.533, avg true_objective: 12.867 +[2024-08-20 19:58:00,938][01505] Num frames 3900... +[2024-08-20 19:58:01,071][01505] Num frames 4000... +[2024-08-20 19:58:01,202][01505] Num frames 4100... +[2024-08-20 19:58:01,334][01505] Num frames 4200... +[2024-08-20 19:58:01,477][01505] Num frames 4300... +[2024-08-20 19:58:01,620][01505] Num frames 4400... +[2024-08-20 19:58:01,751][01505] Num frames 4500... +[2024-08-20 19:58:01,886][01505] Num frames 4600... +[2024-08-20 19:58:02,017][01505] Num frames 4700... +[2024-08-20 19:58:02,148][01505] Num frames 4800... +[2024-08-20 19:58:02,282][01505] Num frames 4900... +[2024-08-20 19:58:02,418][01505] Num frames 5000... +[2024-08-20 19:58:02,566][01505] Num frames 5100... +[2024-08-20 19:58:02,703][01505] Num frames 5200... +[2024-08-20 19:58:02,836][01505] Num frames 5300... +[2024-08-20 19:58:02,976][01505] Num frames 5400... +[2024-08-20 19:58:03,109][01505] Num frames 5500... +[2024-08-20 19:58:03,245][01505] Num frames 5600... +[2024-08-20 19:58:03,380][01505] Num frames 5700... +[2024-08-20 19:58:03,524][01505] Num frames 5800... +[2024-08-20 19:58:03,665][01505] Num frames 5900... +[2024-08-20 19:58:03,800][01505] Avg episode rewards: #0: 40.399, true rewards: #0: 14.900 +[2024-08-20 19:58:03,802][01505] Avg episode reward: 40.399, avg true_objective: 14.900 +[2024-08-20 19:58:03,859][01505] Num frames 6000... +[2024-08-20 19:58:03,990][01505] Num frames 6100... +[2024-08-20 19:58:04,119][01505] Num frames 6200... +[2024-08-20 19:58:04,250][01505] Num frames 6300... +[2024-08-20 19:58:04,383][01505] Num frames 6400... +[2024-08-20 19:58:04,525][01505] Num frames 6500... +[2024-08-20 19:58:04,668][01505] Num frames 6600... +[2024-08-20 19:58:04,797][01505] Num frames 6700... +[2024-08-20 19:58:04,925][01505] Num frames 6800... +[2024-08-20 19:58:05,057][01505] Num frames 6900... +[2024-08-20 19:58:05,185][01505] Num frames 7000... +[2024-08-20 19:58:05,317][01505] Num frames 7100... +[2024-08-20 19:58:05,451][01505] Num frames 7200... +[2024-08-20 19:58:05,598][01505] Num frames 7300... +[2024-08-20 19:58:05,736][01505] Num frames 7400... +[2024-08-20 19:58:05,867][01505] Num frames 7500... +[2024-08-20 19:58:05,998][01505] Num frames 7600... +[2024-08-20 19:58:06,135][01505] Num frames 7700... +[2024-08-20 19:58:06,273][01505] Num frames 7800... +[2024-08-20 19:58:06,409][01505] Num frames 7900... +[2024-08-20 19:58:06,548][01505] Num frames 8000... +[2024-08-20 19:58:06,697][01505] Avg episode rewards: #0: 44.719, true rewards: #0: 16.120 +[2024-08-20 19:58:06,698][01505] Avg episode reward: 44.719, avg true_objective: 16.120 +[2024-08-20 19:58:06,757][01505] Num frames 8100... +[2024-08-20 19:58:06,889][01505] Num frames 8200... +[2024-08-20 19:58:07,023][01505] Num frames 8300... +[2024-08-20 19:58:07,155][01505] Num frames 8400... +[2024-08-20 19:58:07,286][01505] Num frames 8500... +[2024-08-20 19:58:07,417][01505] Num frames 8600... +[2024-08-20 19:58:07,521][01505] Avg episode rewards: #0: 38.893, true rewards: #0: 14.393 +[2024-08-20 19:58:07,523][01505] Avg episode reward: 38.893, avg true_objective: 14.393 +[2024-08-20 19:58:07,624][01505] Num frames 8700... +[2024-08-20 19:58:07,759][01505] Num frames 8800... +[2024-08-20 19:58:07,896][01505] Num frames 8900... +[2024-08-20 19:58:08,026][01505] Num frames 9000... +[2024-08-20 19:58:08,160][01505] Num frames 9100... +[2024-08-20 19:58:08,290][01505] Num frames 9200... +[2024-08-20 19:58:08,437][01505] Num frames 9300... +[2024-08-20 19:58:08,631][01505] Num frames 9400... +[2024-08-20 19:58:08,821][01505] Avg episode rewards: #0: 36.240, true rewards: #0: 13.526 +[2024-08-20 19:58:08,823][01505] Avg episode reward: 36.240, avg true_objective: 13.526 +[2024-08-20 19:58:08,888][01505] Num frames 9500... +[2024-08-20 19:58:09,062][01505] Num frames 9600... +[2024-08-20 19:58:09,244][01505] Num frames 9700... +[2024-08-20 19:58:09,422][01505] Num frames 9800... +[2024-08-20 19:58:09,611][01505] Num frames 9900... +[2024-08-20 19:58:09,813][01505] Num frames 10000... +[2024-08-20 19:58:10,006][01505] Num frames 10100... +[2024-08-20 19:58:10,187][01505] Num frames 10200... +[2024-08-20 19:58:10,380][01505] Avg episode rewards: #0: 33.838, true rewards: #0: 12.839 +[2024-08-20 19:58:10,383][01505] Avg episode reward: 33.838, avg true_objective: 12.839 +[2024-08-20 19:58:10,446][01505] Num frames 10300... +[2024-08-20 19:58:10,647][01505] Num frames 10400... +[2024-08-20 19:58:10,858][01505] Num frames 10500... +[2024-08-20 19:58:11,025][01505] Num frames 10600... +[2024-08-20 19:58:11,154][01505] Num frames 10700... +[2024-08-20 19:58:11,290][01505] Num frames 10800... +[2024-08-20 19:58:11,419][01505] Num frames 10900... +[2024-08-20 19:58:11,557][01505] Num frames 11000... +[2024-08-20 19:58:11,690][01505] Num frames 11100... +[2024-08-20 19:58:11,833][01505] Num frames 11200... +[2024-08-20 19:58:12,013][01505] Avg episode rewards: #0: 32.105, true rewards: #0: 12.550 +[2024-08-20 19:58:12,014][01505] Avg episode reward: 32.105, avg true_objective: 12.550 +[2024-08-20 19:58:12,025][01505] Num frames 11300... +[2024-08-20 19:58:12,158][01505] Num frames 11400... +[2024-08-20 19:58:12,291][01505] Num frames 11500... +[2024-08-20 19:58:12,425][01505] Num frames 11600... +[2024-08-20 19:58:12,567][01505] Num frames 11700... +[2024-08-20 19:58:12,701][01505] Num frames 11800... +[2024-08-20 19:58:12,842][01505] Num frames 11900... +[2024-08-20 19:58:12,976][01505] Num frames 12000... +[2024-08-20 19:58:13,109][01505] Num frames 12100... +[2024-08-20 19:58:13,239][01505] Num frames 12200... +[2024-08-20 19:58:13,386][01505] Num frames 12300... +[2024-08-20 19:58:13,518][01505] Num frames 12400... +[2024-08-20 19:58:13,645][01505] Avg episode rewards: #0: 31.547, true rewards: #0: 12.447 +[2024-08-20 19:58:13,647][01505] Avg episode reward: 31.547, avg true_objective: 12.447 +[2024-08-20 19:59:39,797][01505] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-08-20 20:05:24,818][01505] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-08-20 20:05:24,819][01505] Overriding arg 'num_workers' with value 1 passed from command line +[2024-08-20 20:05:24,822][01505] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-08-20 20:05:24,824][01505] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-08-20 20:05:24,827][01505] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-08-20 20:05:24,829][01505] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-08-20 20:05:24,831][01505] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-08-20 20:05:24,833][01505] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-08-20 20:05:24,835][01505] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-08-20 20:05:24,837][01505] Adding new argument 'hf_repository'='gubhaalimpu/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-08-20 20:05:24,838][01505] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-08-20 20:05:24,839][01505] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-08-20 20:05:24,840][01505] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-08-20 20:05:24,841][01505] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-08-20 20:05:24,843][01505] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-08-20 20:05:24,872][01505] RunningMeanStd input shape: (3, 72, 128) +[2024-08-20 20:05:24,874][01505] RunningMeanStd input shape: (1,) +[2024-08-20 20:05:24,889][01505] ConvEncoder: input_channels=3 +[2024-08-20 20:05:24,938][01505] Conv encoder output size: 512 +[2024-08-20 20:05:24,942][01505] Policy head output size: 512 +[2024-08-20 20:05:24,966][01505] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-08-20 20:05:25,433][01505] Num frames 100... +[2024-08-20 20:05:25,587][01505] Num frames 200... +[2024-08-20 20:05:25,738][01505] Num frames 300... +[2024-08-20 20:05:25,873][01505] Num frames 400... +[2024-08-20 20:05:26,018][01505] Num frames 500... +[2024-08-20 20:05:26,148][01505] Num frames 600... +[2024-08-20 20:05:26,284][01505] Num frames 700... +[2024-08-20 20:05:26,457][01505] Avg episode rewards: #0: 16.820, true rewards: #0: 7.820 +[2024-08-20 20:05:26,458][01505] Avg episode reward: 16.820, avg true_objective: 7.820 +[2024-08-20 20:05:26,485][01505] Num frames 800... +[2024-08-20 20:05:26,627][01505] Num frames 900... +[2024-08-20 20:05:26,760][01505] Num frames 1000... +[2024-08-20 20:05:26,901][01505] Num frames 1100... +[2024-08-20 20:05:27,042][01505] Num frames 1200... +[2024-08-20 20:05:27,179][01505] Num frames 1300... +[2024-08-20 20:05:27,325][01505] Num frames 1400... +[2024-08-20 20:05:27,461][01505] Num frames 1500... +[2024-08-20 20:05:27,606][01505] Num frames 1600... +[2024-08-20 20:05:27,738][01505] Num frames 1700... +[2024-08-20 20:05:27,873][01505] Num frames 1800... +[2024-08-20 20:05:28,004][01505] Num frames 1900... +[2024-08-20 20:05:28,144][01505] Num frames 2000... +[2024-08-20 20:05:28,278][01505] Num frames 2100... +[2024-08-20 20:05:28,450][01505] Avg episode rewards: #0: 25.950, true rewards: #0: 10.950 +[2024-08-20 20:05:28,452][01505] Avg episode reward: 25.950, avg true_objective: 10.950 +[2024-08-20 20:05:28,467][01505] Num frames 2200... +[2024-08-20 20:05:28,607][01505] Num frames 2300... +[2024-08-20 20:05:28,744][01505] Num frames 2400... +[2024-08-20 20:05:28,873][01505] Num frames 2500... +[2024-08-20 20:05:29,012][01505] Num frames 2600... +[2024-08-20 20:05:29,151][01505] Num frames 2700... +[2024-08-20 20:05:29,279][01505] Num frames 2800... +[2024-08-20 20:05:29,413][01505] Num frames 2900... +[2024-08-20 20:05:29,590][01505] Avg episode rewards: #0: 21.967, true rewards: #0: 9.967 +[2024-08-20 20:05:29,593][01505] Avg episode reward: 21.967, avg true_objective: 9.967 +[2024-08-20 20:05:29,610][01505] Num frames 3000... +[2024-08-20 20:05:29,744][01505] Num frames 3100... +[2024-08-20 20:05:29,880][01505] Num frames 3200... +[2024-08-20 20:05:30,014][01505] Num frames 3300... +[2024-08-20 20:05:30,154][01505] Num frames 3400... +[2024-08-20 20:05:30,285][01505] Num frames 3500... +[2024-08-20 20:05:30,431][01505] Avg episode rewards: #0: 19.915, true rewards: #0: 8.915 +[2024-08-20 20:05:30,433][01505] Avg episode reward: 19.915, avg true_objective: 8.915 +[2024-08-20 20:05:30,479][01505] Num frames 3600... +[2024-08-20 20:05:30,653][01505] Num frames 3700... +[2024-08-20 20:05:30,789][01505] Num frames 3800... +[2024-08-20 20:05:30,923][01505] Num frames 3900... +[2024-08-20 20:05:31,051][01505] Num frames 4000... +[2024-08-20 20:05:31,217][01505] Num frames 4100... +[2024-08-20 20:05:31,417][01505] Num frames 4200... +[2024-08-20 20:05:31,609][01505] Num frames 4300... +[2024-08-20 20:05:31,794][01505] Num frames 4400... +[2024-08-20 20:05:31,982][01505] Num frames 4500... +[2024-08-20 20:05:32,166][01505] Num frames 4600... +[2024-08-20 20:05:32,349][01505] Num frames 4700... +[2024-08-20 20:05:32,528][01505] Num frames 4800... +[2024-08-20 20:05:32,613][01505] Avg episode rewards: #0: 21.228, true rewards: #0: 9.628 +[2024-08-20 20:05:32,615][01505] Avg episode reward: 21.228, avg true_objective: 9.628 +[2024-08-20 20:05:32,776][01505] Num frames 4900... +[2024-08-20 20:05:32,967][01505] Num frames 5000... +[2024-08-20 20:05:33,157][01505] Num frames 5100... +[2024-08-20 20:05:33,346][01505] Num frames 5200... +[2024-08-20 20:05:33,538][01505] Num frames 5300... +[2024-08-20 20:05:33,728][01505] Num frames 5400... +[2024-08-20 20:05:33,915][01505] Num frames 5500... +[2024-08-20 20:05:34,106][01505] Num frames 5600... +[2024-08-20 20:05:34,266][01505] Num frames 5700... +[2024-08-20 20:05:34,403][01505] Num frames 5800... +[2024-08-20 20:05:34,547][01505] Num frames 5900... +[2024-08-20 20:05:34,680][01505] Num frames 6000... +[2024-08-20 20:05:34,814][01505] Num frames 6100... +[2024-08-20 20:05:34,945][01505] Num frames 6200... +[2024-08-20 20:05:35,072][01505] Num frames 6300... +[2024-08-20 20:05:35,201][01505] Num frames 6400... +[2024-08-20 20:05:35,340][01505] Num frames 6500... +[2024-08-20 20:05:35,474][01505] Num frames 6600... +[2024-08-20 20:05:35,619][01505] Num frames 6700... +[2024-08-20 20:05:35,759][01505] Num frames 6800... +[2024-08-20 20:05:35,894][01505] Num frames 6900... +[2024-08-20 20:05:35,972][01505] Avg episode rewards: #0: 27.190, true rewards: #0: 11.523 +[2024-08-20 20:05:35,974][01505] Avg episode reward: 27.190, avg true_objective: 11.523 +[2024-08-20 20:05:36,087][01505] Num frames 7000... +[2024-08-20 20:05:36,219][01505] Num frames 7100... +[2024-08-20 20:05:36,362][01505] Num frames 7200... +[2024-08-20 20:05:36,494][01505] Num frames 7300... +[2024-08-20 20:05:36,643][01505] Num frames 7400... +[2024-08-20 20:05:36,775][01505] Num frames 7500... +[2024-08-20 20:05:36,910][01505] Num frames 7600... +[2024-08-20 20:05:37,041][01505] Num frames 7700... +[2024-08-20 20:05:37,174][01505] Num frames 7800... +[2024-08-20 20:05:37,318][01505] Num frames 7900... +[2024-08-20 20:05:37,460][01505] Num frames 8000... +[2024-08-20 20:05:37,601][01505] Num frames 8100... +[2024-08-20 20:05:37,740][01505] Num frames 8200... +[2024-08-20 20:05:37,872][01505] Num frames 8300... +[2024-08-20 20:05:38,007][01505] Num frames 8400... +[2024-08-20 20:05:38,141][01505] Num frames 8500... +[2024-08-20 20:05:38,273][01505] Num frames 8600... +[2024-08-20 20:05:38,414][01505] Num frames 8700... +[2024-08-20 20:05:38,551][01505] Num frames 8800... +[2024-08-20 20:05:38,689][01505] Num frames 8900... +[2024-08-20 20:05:38,830][01505] Num frames 9000... +[2024-08-20 20:05:38,905][01505] Avg episode rewards: #0: 30.877, true rewards: #0: 12.877 +[2024-08-20 20:05:38,907][01505] Avg episode reward: 30.877, avg true_objective: 12.877 +[2024-08-20 20:05:39,021][01505] Num frames 9100... +[2024-08-20 20:05:39,153][01505] Num frames 9200... +[2024-08-20 20:05:39,283][01505] Num frames 9300... +[2024-08-20 20:05:39,425][01505] Num frames 9400... +[2024-08-20 20:05:39,571][01505] Num frames 9500... +[2024-08-20 20:05:39,707][01505] Num frames 9600... +[2024-08-20 20:05:39,841][01505] Num frames 9700... +[2024-08-20 20:05:39,976][01505] Num frames 9800... +[2024-08-20 20:05:40,109][01505] Num frames 9900... +[2024-08-20 20:05:40,244][01505] Num frames 10000... +[2024-08-20 20:05:40,389][01505] Num frames 10100... +[2024-08-20 20:05:40,532][01505] Num frames 10200... +[2024-08-20 20:05:40,670][01505] Num frames 10300... +[2024-08-20 20:05:40,810][01505] Num frames 10400... +[2024-08-20 20:05:40,983][01505] Avg episode rewards: #0: 31.607, true rewards: #0: 13.107 +[2024-08-20 20:05:40,985][01505] Avg episode reward: 31.607, avg true_objective: 13.107 +[2024-08-20 20:05:41,006][01505] Num frames 10500... +[2024-08-20 20:05:41,135][01505] Num frames 10600... +[2024-08-20 20:05:41,272][01505] Num frames 10700... +[2024-08-20 20:05:41,415][01505] Num frames 10800... +[2024-08-20 20:05:41,554][01505] Num frames 10900... +[2024-08-20 20:05:41,692][01505] Num frames 11000... +[2024-08-20 20:05:41,826][01505] Num frames 11100... +[2024-08-20 20:05:42,051][01505] Num frames 11200... +[2024-08-20 20:05:42,337][01505] Num frames 11300... +[2024-08-20 20:05:42,677][01505] Num frames 11400... +[2024-08-20 20:05:42,813][01505] Num frames 11500... +[2024-08-20 20:05:43,033][01505] Num frames 11600... +[2024-08-20 20:05:43,257][01505] Num frames 11700... +[2024-08-20 20:05:43,560][01505] Num frames 11800... +[2024-08-20 20:05:43,862][01505] Num frames 11900... +[2024-08-20 20:05:43,996][01505] Num frames 12000... +[2024-08-20 20:05:44,134][01505] Num frames 12100... +[2024-08-20 20:05:44,317][01505] Num frames 12200... +[2024-08-20 20:05:44,521][01505] Num frames 12300... +[2024-08-20 20:05:44,717][01505] Num frames 12400... +[2024-08-20 20:05:44,904][01505] Num frames 12500... +[2024-08-20 20:05:45,123][01505] Avg episode rewards: #0: 34.095, true rewards: #0: 13.984 +[2024-08-20 20:05:45,125][01505] Avg episode reward: 34.095, avg true_objective: 13.984 +[2024-08-20 20:05:45,153][01505] Num frames 12600... +[2024-08-20 20:05:45,330][01505] Num frames 12700... +[2024-08-20 20:05:45,509][01505] Num frames 12800... +[2024-08-20 20:05:45,719][01505] Num frames 12900... +[2024-08-20 20:05:45,916][01505] Num frames 13000... +[2024-08-20 20:05:46,102][01505] Num frames 13100... +[2024-08-20 20:05:46,294][01505] Num frames 13200... +[2024-08-20 20:05:46,490][01505] Num frames 13300... +[2024-08-20 20:05:46,685][01505] Num frames 13400... +[2024-08-20 20:05:46,913][01505] Avg episode rewards: #0: 32.382, true rewards: #0: 13.482 +[2024-08-20 20:05:46,916][01505] Avg episode reward: 32.382, avg true_objective: 13.482 +[2024-08-20 20:07:26,573][01505] Replay video saved to /content/train_dir/default_experiment/replay.mp4!