Andrei Cozma commited on
Commit
99ac186
·
1 Parent(s): f6cf5f2
Files changed (4) hide show
  1. MCAgent.py +11 -8
  2. README.md +2 -2
  3. Shared.py +13 -10
  4. run.py +3 -2
MCAgent.py CHANGED
@@ -4,8 +4,8 @@ from Shared import Shared
4
  import wandb
5
  from Shared import Shared
6
 
 
7
  class MCAgent(Shared):
8
-
9
  def __init__(self, /, **kwargs):
10
  super().__init__(**kwargs)
11
  self.reset()
@@ -126,12 +126,6 @@ class MCAgent(Shared):
126
  # Test the agent every test_every episodes with the greedy policy (by default)
127
  if e % test_every == 0:
128
  test_success_rate = self.test(verbose=False, **kwargs)
129
- if save_best and test_success_rate > 0.9:
130
- if self.run_name is None:
131
- print(f"Warning: run_name is None, not saving best policy")
132
- else:
133
- self.save_policy(self.run_name, save_best_dir)
134
-
135
  if log_wandb:
136
  self.wandb_log_img(episode=e)
137
 
@@ -145,6 +139,16 @@ class MCAgent(Shared):
145
  if log_wandb:
146
  wandb.log(stats)
147
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  def wandb_log_img(self, episode=None):
150
  caption_suffix = "Initial" if episode is None else f"After Episode {episode}"
@@ -160,4 +164,3 @@ class MCAgent(Shared):
160
  ),
161
  }
162
  )
163
-
 
4
  import wandb
5
  from Shared import Shared
6
 
7
+
8
  class MCAgent(Shared):
 
9
  def __init__(self, /, **kwargs):
10
  super().__init__(**kwargs)
11
  self.reset()
 
126
  # Test the agent every test_every episodes with the greedy policy (by default)
127
  if e % test_every == 0:
128
  test_success_rate = self.test(verbose=False, **kwargs)
 
 
 
 
 
 
129
  if log_wandb:
130
  self.wandb_log_img(episode=e)
131
 
 
139
  if log_wandb:
140
  wandb.log(stats)
141
 
142
+ if test_running_success_rate > 0.999:
143
+ print(
144
+ f"CONVERGED: test success rate running avg reached 100% after {e} episodes."
145
+ )
146
+ if save_best:
147
+ if self.run_name is None:
148
+ print("WARNING: run_name is None, not saving best policy.")
149
+ else:
150
+ self.save_policy(self.run_name, save_best_dir)
151
+ break
152
 
153
  def wandb_log_img(self, episode=None):
154
  caption_suffix = "Initial" if episode is None else f"After Episode {episode}"
 
164
  ),
165
  }
166
  )
 
README.md CHANGED
@@ -75,10 +75,10 @@ Parameter testing results:
75
 
76
  ```bash
77
  # Training: Policy will be saved as a `.npy` file.
78
- python3 MonteCarloAgent.py --train
79
 
80
  # Testing: Use the `--test` flag with the path to the policy file.
81
- python3 MonteCarloAgent.py --test policy_mc_CliffWalking-v0_e2000_s500_g0.99_e0.1.npy --render_mode human
82
  ```
83
 
84
  **MC Usage**
 
75
 
76
  ```bash
77
  # Training: Policy will be saved as a `.npy` file.
78
+ python3 run.py --agent "MCAgent" --train
79
 
80
  # Testing: Use the `--test` flag with the path to the policy file.
81
+ python3 run.py --agent "MCAgent" --test "policies/MCAgent_CliffWalking-v0_e2500_s200_g1.0_e0.4_first_visit.npy" --render_mode human
82
  ```
83
 
84
  **MC Usage**
Shared.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import numpy as np
3
  import gymnasium as gym
 
4
 
5
 
6
  class Shared:
@@ -11,6 +12,7 @@ class Shared:
11
  gamma=0.99,
12
  epsilon=0.1,
13
  run_name=None,
 
14
  **kwargs,
15
  ):
16
  print("=" * 80)
@@ -27,16 +29,17 @@ class Shared:
27
  if self.env_name == "FrozenLake-v1":
28
  # Can use defaults by defining map_name (4x4 or 8x8) or custom map by defining desc
29
  # self.env_kwargs["map_name"] = "8x8"
30
- self.env_kwargs["desc"] = [
31
- "SFFFFFFF",
32
- "FFFFFFFH",
33
- "FFFHFFFF",
34
- "FFFFFHFF",
35
- "FFFHFFFF",
36
- "FHHFFFHF",
37
- "FHFFHFHF",
38
- "FFFHFFFG",
39
- ]
 
40
  self.env_kwargs["is_slippery"] = False
41
 
42
  self.env = gym.make(self.env_name, **self.env_kwargs)
 
1
  import os
2
  import numpy as np
3
  import gymnasium as gym
4
+ from gymnasium.envs.toy_text.frozen_lake import generate_random_map
5
 
6
 
7
  class Shared:
 
12
  gamma=0.99,
13
  epsilon=0.1,
14
  run_name=None,
15
+ frozenlake_size=8,
16
  **kwargs,
17
  ):
18
  print("=" * 80)
 
29
  if self.env_name == "FrozenLake-v1":
30
  # Can use defaults by defining map_name (4x4 or 8x8) or custom map by defining desc
31
  # self.env_kwargs["map_name"] = "8x8"
32
+ # self.env_kwargs["desc"] = [
33
+ # "SFFFFFFF",
34
+ # "FFFFFFFH",
35
+ # "FFFHFFFF",
36
+ # "FFFFFHFF",
37
+ # "FFFHFFFF",
38
+ # "FHHFFFHF",
39
+ # "FHFFHFHF",
40
+ # "FFFHFFFG",
41
+ # ]
42
+ self.env_kwargs["desc"] = generate_random_map(size=frozenlake_size)
43
  self.env_kwargs["is_slippery"] = False
44
 
45
  self.env = gym.make(self.env_name, **self.env_kwargs)
run.py CHANGED
@@ -3,6 +3,7 @@ import wandb
3
 
4
  from agents import AGENTS_MAP
5
 
 
6
  def main():
7
  parser = argparse.ArgumentParser()
8
  ### Train/Test parameters
@@ -72,7 +73,7 @@ def main():
72
  choices=AGENTS_MAP.keys(),
73
  help=f"The agent to use. One of: {AGENTS_MAP.keys()}",
74
  )
75
-
76
  parser.add_argument(
77
  "--gamma",
78
  type=float,
@@ -94,7 +95,7 @@ def main():
94
  choices=["CliffWalking-v0", "FrozenLake-v1", "Taxi-v3"],
95
  help="The Gymnasium environment to use. (default: CliffWalking-v0)",
96
  )
97
-
98
  parser.add_argument(
99
  "--render_mode",
100
  type=str,
 
3
 
4
  from agents import AGENTS_MAP
5
 
6
+
7
  def main():
8
  parser = argparse.ArgumentParser()
9
  ### Train/Test parameters
 
73
  choices=AGENTS_MAP.keys(),
74
  help=f"The agent to use. One of: {AGENTS_MAP.keys()}",
75
  )
76
+
77
  parser.add_argument(
78
  "--gamma",
79
  type=float,
 
95
  choices=["CliffWalking-v0", "FrozenLake-v1", "Taxi-v3"],
96
  help="The Gymnasium environment to use. (default: CliffWalking-v0)",
97
  )
98
+
99
  parser.add_argument(
100
  "--render_mode",
101
  type=str,