asm: | |
env: ecorl.envs.asm.AsmEnv | |
run: PPO | |
stop: | |
time_total_s: 24000 | |
config: | |
lambda: 0.95 | |
kl_coeff: 0.5 | |
clip_param: 0.2 | |
vf_clip_param: 400.0 | |
entropy_coeff: 0.0001 | |
rollout_fragment_length: auto | |
num_sgd_iter: 10 | |
num_envs_per_worker: 24 | |
min_time_s_per_iteration: 360 | |
lr: 0.0003 | |
# Run with Learner- and RLModule API (new stack). | |
_enable_learner_api: true | |
_enable_rl_module_api: true | |
# Use N Learner worker on the GPU | |
num_learner_workers: 2 | |
num_gpus_per_learner_worker: 1 | |
num_gpus: 0 # No GPU needed for driver. | |
# Since we are using learner workers, the driver process does not need | |
# a CPU in particular. | |
num_cpus_for_local_worker: 1 | |
# Need to unset this b/c we are using the RLModule API, which | |
# provides exploration control via the RLModule's `forward_exploration` method. | |
exploration_config: {} | |