Spaces:
Sleeping
Sleeping
import os | |
import multiprocessing | |
import random | |
wandb_project = "cs581" | |
env = "CliffWalking-v0" | |
n_train_episodes = 2500 | |
max_steps = 200 | |
num_tests = 10 | |
vals_update_type = [ | |
"first_visit" | |
] # Every visit takes too long due to this environment's reward structure | |
vals_epsilon = [0.1, 0.2, 0.3, 0.4, 0.5] | |
vals_gamma = [1.0, 0.98, 0.96, 0.94] | |
def run_test(args): | |
os.system( | |
f"python3 MonteCarloAgent.py --train --n_train_episodes {n_train_episodes} --max_steps {max_steps} --env {env} --gamma {args[0]} --epsilon {args[1]} --update_type {args[2]} --wandb_project {wandb_project} --wandb_run_name_suffix {args[3]} --no_save" | |
) | |
with multiprocessing.Pool(16) as p: | |
tests = [] | |
for update_type in vals_update_type: | |
for gamma in vals_gamma: | |
for eps in vals_epsilon: | |
tests.extend((gamma, eps, update_type, i) for i in range(num_tests)) | |
random.shuffle(tests) | |
p.map(run_test, tests) | |