Spaces:

acozma
/

CS581-Algos-Demo

Sleeping

Andrei Cozma commited on Apr 19, 2023

Commit

370ef8f

1 Parent(s): 00b3e73

Updates

Files changed (2) hide show

MonteCarloAgent.py CHANGED Viewed

@@ -207,10 +207,16 @@ def main():
     parser.add_argument(
         "--max_steps",
         type=int,
-        default=500,
         help="The maximum number of steps per episode before the episode is forced to end. (default: 500)",
     )
     ### Agent parameters
     parser.add_argument(
         "--gamma",
@@ -221,7 +227,7 @@ def main():
     parser.add_argument(
         "--epsilon",
         type=float,
-        default=0.1,
         help="The value for the epsilon-greedy policy to use. (default: 0.1)",
     )
@@ -295,7 +301,8 @@ def main():
                 max_steps=args.max_steps,
                 log_wandb=args.wandb_project is not None,
             )
-            mca.save_policy(fname=f"policy_{run_name}.npy")
         elif args.test is not None:
             if not args.test.endswith(".npy"):
                 args.test += ".npy"

     parser.add_argument(
         "--max_steps",
         type=int,
+        default=250,
         help="The maximum number of steps per episode before the episode is forced to end. (default: 500)",
     )
+    parser.add_argument(
+        "--no_save",
+        action="store_true",
+        help="Use this flag to disable saving the policy.",
+    )
     ### Agent parameters
     parser.add_argument(
         "--gamma",
     parser.add_argument(
         "--epsilon",
         type=float,
+        default=0.5,
         help="The value for the epsilon-greedy policy to use. (default: 0.1)",
     )
                 max_steps=args.max_steps,
                 log_wandb=args.wandb_project is not None,
             )
+            if not args.no_save:
+                mca.save_policy(fname=f"policy_{run_name}.npy")
         elif args.test is not None:
             if not args.test.endswith(".npy"):
                 args.test += ".npy"

run_tests.py CHANGED Viewed

@@ -2,14 +2,14 @@ import os
 import multiprocessing
 vals_eps = [0.1, 0.25, 0.5, 0.75, 0.9]
-vals_gamma = [1.0, 0.97, 0.95, 0.9, 0.75, 0.5]
 num_tests = 10
 def run_test(args):
     os.system(
-        f"python3 MonteCarloAgent.py --train  --gamma {args[0]} --epsilon {args[1]} --wandb_project cs581 --wandb_job_type params --wandb_run_name_suffix {args[2]}"
     )

 import multiprocessing
 vals_eps = [0.1, 0.25, 0.5, 0.75, 0.9]
+vals_gamma = [1.0, 0.99, 0.98, 0.97, 0.95]
 num_tests = 10
 def run_test(args):
     os.system(
+        f"python3 MonteCarloAgent.py --train  --gamma {args[0]} --epsilon {args[1]} --wandb_project cs581 --wandb_job_type params --wandb_run_name_suffix {args[2]} --no_save"
     )