rl4eco / sb3 /hyperpars /ppo-caribou.yml

Upload sb3/hyperpars/ppo-caribou.yml with huggingface_hub

59e5c2e verified 8 months ago

1.59 kB

	# stable-baselines3 configuration

	# algo: "PPO"
	# env_id: "CaribouScipy"
	# config: {}
	# n_envs: 12
	# tensorboard: "../../../logs"
	# total_timesteps: 1000000
	# use_sde: True
	# repo: "boettiger-lab/rl4eco"
	# save_path: "../saved_agents"
	# id: "3"

	algo: "PPO"
	total_timesteps: 5000000
	algo_config:
	tensorboard_log: "../../../logs"
	#
	policy: 'MlpPolicy'
	policy_kwargs: "dict(net_arch=[64, 32])"
	# policy_kwargs: "dict(net_arch=[256, 128])"
	# batch_size: 512
	# gamma: 0.9999
	# learning_rate: !!float 7.77e-05
	# ent_coef: 0.00429
	# clip_range: 0.1
	# gae_lambda: 0.9
	# max_grad_norm: 5
	# vf_coef: 0.19
	# use_sde: True
	# policy_kwargs: "dict(log_std_init=-3.29, ortho_init=False, net_arch=[256, 128])"
	# in policy_kwargs: net_arch=[400, 300]
	# policy: 'MlpPolicy'
	# use_sde: True
	# policy_kwargs: "dict(log_std_init=-3, net_arch=[400, 300])"
	# clip_range: 0.1

	# env
	env_id: "CaribouScipy"
	#this is what is passed into the creation of the environment, could pass diff param values here to change the defaults (instead of messing with the params defined in caribou_ode.py)
	config:
	Tmax: 800
	budget: 50
	n_envs: 12

	tensorboard: "/home/rstudio/logs"
	#total_timesteps: 50000
	total_timesteps: 10000000
	use_sde: True
	repo: "boettiger-lab/rl4eco"
	save_path: "../saved_agents"
	id: "LFupdate_shortTF1strat"
	#rppo - ppo that has memory (should be better than PPO but hard to train lol)
	#td3 - giving some weird errors
	#tqc is actually better if we can give it enough time to run
	# misc
	additional_imports: ["torch"]