caribou: env: src.caribou.s3a2 run: PPO stop: time_total_s: 24000 config: lambda: 0.95 kl_coeff: 0.5 clip_param: 0.2 vf_clip_param: 400.0 entropy_coeff: 0.0001 # train_batch_size: 64000 rollout_fragment_length: auto # sgd_minibatch_size: 16000 num_sgd_iter: 10 num_envs_per_worker: 24 min_time_s_per_iteration: 30 lr: 0.0003 # Run with Learner- and RLModule API (new stack). _enable_learner_api: true _enable_rl_module_api: true # Use N Learner worker on the GPU num_learner_workers: 2 num_gpus_per_learner_worker: 1 num_gpus: 0 # No GPU needed for driver. # Since we are using learner workers, the driver process does not need # a CPU in particular. num_cpus_for_local_worker: 1 # Need to unset this b/c we are using the RLModule API, which # provides exploration control via the RLModule's `forward_exploration` method. exploration_config: {}