Andrei Cozma commited on
Commit
370ef8f
·
1 Parent(s): 00b3e73
Files changed (2) hide show
  1. MonteCarloAgent.py +10 -3
  2. run_tests.py +2 -2
MonteCarloAgent.py CHANGED
@@ -207,10 +207,16 @@ def main():
207
  parser.add_argument(
208
  "--max_steps",
209
  type=int,
210
- default=500,
211
  help="The maximum number of steps per episode before the episode is forced to end. (default: 500)",
212
  )
213
 
 
 
 
 
 
 
214
  ### Agent parameters
215
  parser.add_argument(
216
  "--gamma",
@@ -221,7 +227,7 @@ def main():
221
  parser.add_argument(
222
  "--epsilon",
223
  type=float,
224
- default=0.1,
225
  help="The value for the epsilon-greedy policy to use. (default: 0.1)",
226
  )
227
 
@@ -295,7 +301,8 @@ def main():
295
  max_steps=args.max_steps,
296
  log_wandb=args.wandb_project is not None,
297
  )
298
- mca.save_policy(fname=f"policy_{run_name}.npy")
 
299
  elif args.test is not None:
300
  if not args.test.endswith(".npy"):
301
  args.test += ".npy"
 
207
  parser.add_argument(
208
  "--max_steps",
209
  type=int,
210
+ default=250,
211
  help="The maximum number of steps per episode before the episode is forced to end. (default: 500)",
212
  )
213
 
214
+ parser.add_argument(
215
+ "--no_save",
216
+ action="store_true",
217
+ help="Use this flag to disable saving the policy.",
218
+ )
219
+
220
  ### Agent parameters
221
  parser.add_argument(
222
  "--gamma",
 
227
  parser.add_argument(
228
  "--epsilon",
229
  type=float,
230
+ default=0.5,
231
  help="The value for the epsilon-greedy policy to use. (default: 0.1)",
232
  )
233
 
 
301
  max_steps=args.max_steps,
302
  log_wandb=args.wandb_project is not None,
303
  )
304
+ if not args.no_save:
305
+ mca.save_policy(fname=f"policy_{run_name}.npy")
306
  elif args.test is not None:
307
  if not args.test.endswith(".npy"):
308
  args.test += ".npy"
run_tests.py CHANGED
@@ -2,14 +2,14 @@ import os
2
  import multiprocessing
3
 
4
  vals_eps = [0.1, 0.25, 0.5, 0.75, 0.9]
5
- vals_gamma = [1.0, 0.97, 0.95, 0.9, 0.75, 0.5]
6
 
7
  num_tests = 10
8
 
9
 
10
  def run_test(args):
11
  os.system(
12
- f"python3 MonteCarloAgent.py --train --gamma {args[0]} --epsilon {args[1]} --wandb_project cs581 --wandb_job_type params --wandb_run_name_suffix {args[2]}"
13
  )
14
 
15
 
 
2
  import multiprocessing
3
 
4
  vals_eps = [0.1, 0.25, 0.5, 0.75, 0.9]
5
+ vals_gamma = [1.0, 0.99, 0.98, 0.97, 0.95]
6
 
7
  num_tests = 10
8
 
9
 
10
  def run_test(args):
11
  os.system(
12
+ f"python3 MonteCarloAgent.py --train --gamma {args[0]} --epsilon {args[1]} --wandb_project cs581 --wandb_job_type params --wandb_run_name_suffix {args[2]} --no_save"
13
  )
14
 
15