# stable-baselines3 configuration | |
# algo: "PPO" | |
# env_id: "CaribouScipy" | |
# config: {} | |
# n_envs: 12 | |
# tensorboard: "../../../logs" | |
# total_timesteps: 1000000 | |
# use_sde: True | |
# repo: "boettiger-lab/rl4eco" | |
# save_path: "../saved_agents" | |
# id: "3" | |
algo: "PPO" | |
total_timesteps: 5000000 | |
algo_config: | |
tensorboard_log: "../../../logs" | |
# | |
policy: 'MlpPolicy' | |
policy_kwargs: "dict(net_arch=[64, 32])" | |
# policy_kwargs: "dict(net_arch=[256, 128])" | |
# batch_size: 512 | |
# gamma: 0.9999 | |
# learning_rate: !!float 7.77e-05 | |
# ent_coef: 0.00429 | |
# clip_range: 0.1 | |
# gae_lambda: 0.9 | |
# max_grad_norm: 5 | |
# vf_coef: 0.19 | |
# use_sde: True | |
# policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])" | |
# in policy_kwargs: net_arch=[400, 300] | |
# policy: 'MlpPolicy' | |
# use_sde: True | |
# policy_kwargs: "dict(log_std_init=-3, net_arch=[400, 300])" | |
# clip_range: 0.1 | |
# env | |
env_id: "CaribouScipy" | |
#this is what is passed into the creation of the environment, could pass diff param values here to change the defaults (instead of messing with the params defined in caribou_ode.py) | |
config: | |
Tmax: 800 | |
budget: 50 | |
n_envs: 12 | |
tensorboard: "/home/rstudio/logs" | |
#total_timesteps: 50000 | |
total_timesteps: 10000000 | |
use_sde: True | |
repo: "boettiger-lab/rl4eco" | |
save_path: "../saved_agents" | |
id: "LFupdate_shortTF1strat" | |
#rppo - ppo that has memory (should be better than PPO but hard to train lol) | |
#td3 - giving some weird errors | |
#tqc is actually better if we can give it enough time to run | |
# misc | |
additional_imports: ["torch"] | |