Spaces:
Sleeping
Sleeping
Andrei Cozma
commited on
Commit
·
99ac186
1
Parent(s):
f6cf5f2
Updates
Browse files
MCAgent.py
CHANGED
@@ -4,8 +4,8 @@ from Shared import Shared
|
|
4 |
import wandb
|
5 |
from Shared import Shared
|
6 |
|
|
|
7 |
class MCAgent(Shared):
|
8 |
-
|
9 |
def __init__(self, /, **kwargs):
|
10 |
super().__init__(**kwargs)
|
11 |
self.reset()
|
@@ -126,12 +126,6 @@ class MCAgent(Shared):
|
|
126 |
# Test the agent every test_every episodes with the greedy policy (by default)
|
127 |
if e % test_every == 0:
|
128 |
test_success_rate = self.test(verbose=False, **kwargs)
|
129 |
-
if save_best and test_success_rate > 0.9:
|
130 |
-
if self.run_name is None:
|
131 |
-
print(f"Warning: run_name is None, not saving best policy")
|
132 |
-
else:
|
133 |
-
self.save_policy(self.run_name, save_best_dir)
|
134 |
-
|
135 |
if log_wandb:
|
136 |
self.wandb_log_img(episode=e)
|
137 |
|
@@ -145,6 +139,16 @@ class MCAgent(Shared):
|
|
145 |
if log_wandb:
|
146 |
wandb.log(stats)
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
def wandb_log_img(self, episode=None):
|
150 |
caption_suffix = "Initial" if episode is None else f"After Episode {episode}"
|
@@ -160,4 +164,3 @@ class MCAgent(Shared):
|
|
160 |
),
|
161 |
}
|
162 |
)
|
163 |
-
|
|
|
4 |
import wandb
|
5 |
from Shared import Shared
|
6 |
|
7 |
+
|
8 |
class MCAgent(Shared):
|
|
|
9 |
def __init__(self, /, **kwargs):
|
10 |
super().__init__(**kwargs)
|
11 |
self.reset()
|
|
|
126 |
# Test the agent every test_every episodes with the greedy policy (by default)
|
127 |
if e % test_every == 0:
|
128 |
test_success_rate = self.test(verbose=False, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
if log_wandb:
|
130 |
self.wandb_log_img(episode=e)
|
131 |
|
|
|
139 |
if log_wandb:
|
140 |
wandb.log(stats)
|
141 |
|
142 |
+
if test_running_success_rate > 0.999:
|
143 |
+
print(
|
144 |
+
f"CONVERGED: test success rate running avg reached 100% after {e} episodes."
|
145 |
+
)
|
146 |
+
if save_best:
|
147 |
+
if self.run_name is None:
|
148 |
+
print("WARNING: run_name is None, not saving best policy.")
|
149 |
+
else:
|
150 |
+
self.save_policy(self.run_name, save_best_dir)
|
151 |
+
break
|
152 |
|
153 |
def wandb_log_img(self, episode=None):
|
154 |
caption_suffix = "Initial" if episode is None else f"After Episode {episode}"
|
|
|
164 |
),
|
165 |
}
|
166 |
)
|
|
README.md
CHANGED
@@ -75,10 +75,10 @@ Parameter testing results:
|
|
75 |
|
76 |
```bash
|
77 |
# Training: Policy will be saved as a `.npy` file.
|
78 |
-
python3
|
79 |
|
80 |
# Testing: Use the `--test` flag with the path to the policy file.
|
81 |
-
python3
|
82 |
```
|
83 |
|
84 |
**MC Usage**
|
|
|
75 |
|
76 |
```bash
|
77 |
# Training: Policy will be saved as a `.npy` file.
|
78 |
+
python3 run.py --agent "MCAgent" --train
|
79 |
|
80 |
# Testing: Use the `--test` flag with the path to the policy file.
|
81 |
+
python3 run.py --agent "MCAgent" --test "policies/MCAgent_CliffWalking-v0_e2500_s200_g1.0_e0.4_first_visit.npy" --render_mode human
|
82 |
```
|
83 |
|
84 |
**MC Usage**
|
Shared.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os
|
2 |
import numpy as np
|
3 |
import gymnasium as gym
|
|
|
4 |
|
5 |
|
6 |
class Shared:
|
@@ -11,6 +12,7 @@ class Shared:
|
|
11 |
gamma=0.99,
|
12 |
epsilon=0.1,
|
13 |
run_name=None,
|
|
|
14 |
**kwargs,
|
15 |
):
|
16 |
print("=" * 80)
|
@@ -27,16 +29,17 @@ class Shared:
|
|
27 |
if self.env_name == "FrozenLake-v1":
|
28 |
# Can use defaults by defining map_name (4x4 or 8x8) or custom map by defining desc
|
29 |
# self.env_kwargs["map_name"] = "8x8"
|
30 |
-
self.env_kwargs["desc"] = [
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
]
|
|
|
40 |
self.env_kwargs["is_slippery"] = False
|
41 |
|
42 |
self.env = gym.make(self.env_name, **self.env_kwargs)
|
|
|
1 |
import os
|
2 |
import numpy as np
|
3 |
import gymnasium as gym
|
4 |
+
from gymnasium.envs.toy_text.frozen_lake import generate_random_map
|
5 |
|
6 |
|
7 |
class Shared:
|
|
|
12 |
gamma=0.99,
|
13 |
epsilon=0.1,
|
14 |
run_name=None,
|
15 |
+
frozenlake_size=8,
|
16 |
**kwargs,
|
17 |
):
|
18 |
print("=" * 80)
|
|
|
29 |
if self.env_name == "FrozenLake-v1":
|
30 |
# Can use defaults by defining map_name (4x4 or 8x8) or custom map by defining desc
|
31 |
# self.env_kwargs["map_name"] = "8x8"
|
32 |
+
# self.env_kwargs["desc"] = [
|
33 |
+
# "SFFFFFFF",
|
34 |
+
# "FFFFFFFH",
|
35 |
+
# "FFFHFFFF",
|
36 |
+
# "FFFFFHFF",
|
37 |
+
# "FFFHFFFF",
|
38 |
+
# "FHHFFFHF",
|
39 |
+
# "FHFFHFHF",
|
40 |
+
# "FFFHFFFG",
|
41 |
+
# ]
|
42 |
+
self.env_kwargs["desc"] = generate_random_map(size=frozenlake_size)
|
43 |
self.env_kwargs["is_slippery"] = False
|
44 |
|
45 |
self.env = gym.make(self.env_name, **self.env_kwargs)
|
run.py
CHANGED
@@ -3,6 +3,7 @@ import wandb
|
|
3 |
|
4 |
from agents import AGENTS_MAP
|
5 |
|
|
|
6 |
def main():
|
7 |
parser = argparse.ArgumentParser()
|
8 |
### Train/Test parameters
|
@@ -72,7 +73,7 @@ def main():
|
|
72 |
choices=AGENTS_MAP.keys(),
|
73 |
help=f"The agent to use. One of: {AGENTS_MAP.keys()}",
|
74 |
)
|
75 |
-
|
76 |
parser.add_argument(
|
77 |
"--gamma",
|
78 |
type=float,
|
@@ -94,7 +95,7 @@ def main():
|
|
94 |
choices=["CliffWalking-v0", "FrozenLake-v1", "Taxi-v3"],
|
95 |
help="The Gymnasium environment to use. (default: CliffWalking-v0)",
|
96 |
)
|
97 |
-
|
98 |
parser.add_argument(
|
99 |
"--render_mode",
|
100 |
type=str,
|
|
|
3 |
|
4 |
from agents import AGENTS_MAP
|
5 |
|
6 |
+
|
7 |
def main():
|
8 |
parser = argparse.ArgumentParser()
|
9 |
### Train/Test parameters
|
|
|
73 |
choices=AGENTS_MAP.keys(),
|
74 |
help=f"The agent to use. One of: {AGENTS_MAP.keys()}",
|
75 |
)
|
76 |
+
|
77 |
parser.add_argument(
|
78 |
"--gamma",
|
79 |
type=float,
|
|
|
95 |
choices=["CliffWalking-v0", "FrozenLake-v1", "Taxi-v3"],
|
96 |
help="The Gymnasium environment to use. (default: CliffWalking-v0)",
|
97 |
)
|
98 |
+
|
99 |
parser.add_argument(
|
100 |
"--render_mode",
|
101 |
type=str,
|