Spaces:
Sleeping
Sleeping
Andrei Cozma
commited on
Commit
·
370ef8f
1
Parent(s):
00b3e73
Updates
Browse files- MonteCarloAgent.py +10 -3
- run_tests.py +2 -2
MonteCarloAgent.py
CHANGED
@@ -207,10 +207,16 @@ def main():
|
|
207 |
parser.add_argument(
|
208 |
"--max_steps",
|
209 |
type=int,
|
210 |
-
default=
|
211 |
help="The maximum number of steps per episode before the episode is forced to end. (default: 500)",
|
212 |
)
|
213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
### Agent parameters
|
215 |
parser.add_argument(
|
216 |
"--gamma",
|
@@ -221,7 +227,7 @@ def main():
|
|
221 |
parser.add_argument(
|
222 |
"--epsilon",
|
223 |
type=float,
|
224 |
-
default=0.
|
225 |
help="The value for the epsilon-greedy policy to use. (default: 0.1)",
|
226 |
)
|
227 |
|
@@ -295,7 +301,8 @@ def main():
|
|
295 |
max_steps=args.max_steps,
|
296 |
log_wandb=args.wandb_project is not None,
|
297 |
)
|
298 |
-
|
|
|
299 |
elif args.test is not None:
|
300 |
if not args.test.endswith(".npy"):
|
301 |
args.test += ".npy"
|
|
|
207 |
parser.add_argument(
|
208 |
"--max_steps",
|
209 |
type=int,
|
210 |
+
default=250,
|
211 |
help="The maximum number of steps per episode before the episode is forced to end. (default: 500)",
|
212 |
)
|
213 |
|
214 |
+
parser.add_argument(
|
215 |
+
"--no_save",
|
216 |
+
action="store_true",
|
217 |
+
help="Use this flag to disable saving the policy.",
|
218 |
+
)
|
219 |
+
|
220 |
### Agent parameters
|
221 |
parser.add_argument(
|
222 |
"--gamma",
|
|
|
227 |
parser.add_argument(
|
228 |
"--epsilon",
|
229 |
type=float,
|
230 |
+
default=0.5,
|
231 |
help="The value for the epsilon-greedy policy to use. (default: 0.1)",
|
232 |
)
|
233 |
|
|
|
301 |
max_steps=args.max_steps,
|
302 |
log_wandb=args.wandb_project is not None,
|
303 |
)
|
304 |
+
if not args.no_save:
|
305 |
+
mca.save_policy(fname=f"policy_{run_name}.npy")
|
306 |
elif args.test is not None:
|
307 |
if not args.test.endswith(".npy"):
|
308 |
args.test += ".npy"
|
run_tests.py
CHANGED
@@ -2,14 +2,14 @@ import os
|
|
2 |
import multiprocessing
|
3 |
|
4 |
vals_eps = [0.1, 0.25, 0.5, 0.75, 0.9]
|
5 |
-
vals_gamma = [1.0, 0.
|
6 |
|
7 |
num_tests = 10
|
8 |
|
9 |
|
10 |
def run_test(args):
|
11 |
os.system(
|
12 |
-
f"python3 MonteCarloAgent.py --train --gamma {args[0]} --epsilon {args[1]} --wandb_project cs581 --wandb_job_type params --wandb_run_name_suffix {args[2]}"
|
13 |
)
|
14 |
|
15 |
|
|
|
2 |
import multiprocessing
|
3 |
|
4 |
vals_eps = [0.1, 0.25, 0.5, 0.75, 0.9]
|
5 |
+
vals_gamma = [1.0, 0.99, 0.98, 0.97, 0.95]
|
6 |
|
7 |
num_tests = 10
|
8 |
|
9 |
|
10 |
def run_test(args):
|
11 |
os.system(
|
12 |
+
f"python3 MonteCarloAgent.py --train --gamma {args[0]} --epsilon {args[1]} --wandb_project cs581 --wandb_job_type params --wandb_run_name_suffix {args[2]} --no_save"
|
13 |
)
|
14 |
|
15 |
|