rl-ecology / rllib /ppo-caribou.yaml
cboettig's picture
and so it begins...
026bc1a
caribou:
env: src.caribou.s3a2
run: PPO
stop:
time_total_s: 24000
config:
lambda: 0.95
kl_coeff: 0.5
clip_param: 0.2
vf_clip_param: 400.0
entropy_coeff: 0.0001
# train_batch_size: 64000
rollout_fragment_length: auto
# sgd_minibatch_size: 16000
num_sgd_iter: 10
num_envs_per_worker: 24
min_time_s_per_iteration: 30
lr: 0.0003
# Run with Learner- and RLModule API (new stack).
_enable_learner_api: true
_enable_rl_module_api: true
# Use N Learner worker on the GPU
num_learner_workers: 2
num_gpus_per_learner_worker: 1
num_gpus: 0 # No GPU needed for driver.
# Since we are using learner workers, the driver process does not need
# a CPU in particular.
num_cpus_for_local_worker: 1
# Need to unset this b/c we are using the RLModule API, which
# provides exploration control via the RLModule's `forward_exploration` method.
exploration_config: {}