stable-baselines3
rl4eco / sb3 /hyperpars /ppo-caribou.yml
felimomo's picture
Upload sb3/hyperpars/ppo-caribou.yml with huggingface_hub
59e5c2e verified
# stable-baselines3 configuration
# algo: "PPO"
# env_id: "CaribouScipy"
# config: {}
# n_envs: 12
# tensorboard: "../../../logs"
# total_timesteps: 1000000
# use_sde: True
# repo: "boettiger-lab/rl4eco"
# save_path: "../saved_agents"
# id: "3"
algo: "PPO"
total_timesteps: 5000000
algo_config:
tensorboard_log: "../../../logs"
#
policy: 'MlpPolicy'
policy_kwargs: "dict(net_arch=[64, 32])"
# policy_kwargs: "dict(net_arch=[256, 128])"
# batch_size: 512
# gamma: 0.9999
# learning_rate: !!float 7.77e-05
# ent_coef: 0.00429
# clip_range: 0.1
# gae_lambda: 0.9
# max_grad_norm: 5
# vf_coef: 0.19
# use_sde: True
# policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
# in policy_kwargs: net_arch=[400, 300]
# policy: 'MlpPolicy'
# use_sde: True
# policy_kwargs: "dict(log_std_init=-3, net_arch=[400, 300])"
# clip_range: 0.1
# env
env_id: "CaribouScipy"
#this is what is passed into the creation of the environment, could pass diff param values here to change the defaults (instead of messing with the params defined in caribou_ode.py)
config:
Tmax: 800
budget: 50
n_envs: 12
tensorboard: "/home/rstudio/logs"
#total_timesteps: 50000
total_timesteps: 10000000
use_sde: True
repo: "boettiger-lab/rl4eco"
save_path: "../saved_agents"
id: "LFupdate_shortTF1strat"
#rppo - ppo that has memory (should be better than PPO but hard to train lol)
#td3 - giving some weird errors
#tqc is actually better if we can give it enough time to run
# misc
additional_imports: ["torch"]