Spaces:
Sleeping
Sleeping
retrain frozenlake
Browse files- DPAgent.py +1 -3
- MCAgent.py +2 -2
- Shared.py +3 -3
- policies/DPAgent_FrozenLake-v1_e2500_s200_g0.99_e0.4_first_visit.npy +0 -0
DPAgent.py
CHANGED
@@ -16,8 +16,6 @@ class DPAgent(Shared):
|
|
16 |
self.Pi = np.zeros(self.env.observation_space.n, self.env.action_space.n)
|
17 |
if self.gamma >= 1.0:
|
18 |
warnings.warn("DP will never converge with a gamma value =1.0. Try 0.99?", UserWarning)
|
19 |
-
print(self.env)
|
20 |
-
exit(1)
|
21 |
|
22 |
def policy(self, state):
|
23 |
return self.Pi[state]
|
@@ -68,7 +66,7 @@ class DPAgent(Shared):
|
|
68 |
|
69 |
if __name__ == "__main__":
|
70 |
# env = gym.make('FrozenLake-v1', render_mode='human')
|
71 |
-
dp = DPAgent(
|
72 |
dp.train()
|
73 |
dp.save_policy('dp_policy.npy')
|
74 |
env = gym.make('FrozenLake-v1', render_mode='human', is_slippery=False, desc=[
|
|
|
16 |
self.Pi = np.zeros(self.env.observation_space.n, self.env.action_space.n)
|
17 |
if self.gamma >= 1.0:
|
18 |
warnings.warn("DP will never converge with a gamma value =1.0. Try 0.99?", UserWarning)
|
|
|
|
|
19 |
|
20 |
def policy(self, state):
|
21 |
return self.Pi[state]
|
|
|
66 |
|
67 |
if __name__ == "__main__":
|
68 |
# env = gym.make('FrozenLake-v1', render_mode='human')
|
69 |
+
dp = DPAgent(env="FrozenLake-v1", gamma=0.99)
|
70 |
dp.train()
|
71 |
dp.save_policy('dp_policy.npy')
|
72 |
env = gym.make('FrozenLake-v1', render_mode='human', is_slippery=False, desc=[
|
MCAgent.py
CHANGED
@@ -9,13 +9,13 @@ class MCAgent(Shared):
|
|
9 |
def __init__(
|
10 |
self,
|
11 |
/,
|
12 |
-
|
13 |
gamma=0.99,
|
14 |
epsilon=0.1,
|
15 |
run_name=None,
|
16 |
**kwargs,
|
17 |
):
|
18 |
-
super().__init__(
|
19 |
self.reset()
|
20 |
|
21 |
def reset(self):
|
|
|
9 |
def __init__(
|
10 |
self,
|
11 |
/,
|
12 |
+
env="CliffWalking-v0",
|
13 |
gamma=0.99,
|
14 |
epsilon=0.1,
|
15 |
run_name=None,
|
16 |
**kwargs,
|
17 |
):
|
18 |
+
super().__init__(env, gamma, epsilon, run_name, **kwargs)
|
19 |
self.reset()
|
20 |
|
21 |
def reset(self):
|
Shared.py
CHANGED
@@ -6,19 +6,19 @@ class Shared:
|
|
6 |
|
7 |
def __init__(
|
8 |
self,/,
|
9 |
-
|
10 |
gamma=0.99,
|
11 |
epsilon=0.1,
|
12 |
run_name=None,
|
13 |
**kwargs,
|
14 |
):
|
15 |
print("=" * 80)
|
16 |
-
print(f"# Init Agent - {
|
17 |
print(f"- epsilon: {epsilon}")
|
18 |
print(f"- gamma: {gamma}")
|
19 |
print(f"- run_name: {run_name}")
|
20 |
self.run_name = run_name
|
21 |
-
self.env_name =
|
22 |
self.epsilon, self.gamma = epsilon, gamma
|
23 |
|
24 |
self.env_kwargs = {k:v for k,v in kwargs.items() if k in ['render_mode']}
|
|
|
6 |
|
7 |
def __init__(
|
8 |
self,/,
|
9 |
+
env="CliffWalking-v0",
|
10 |
gamma=0.99,
|
11 |
epsilon=0.1,
|
12 |
run_name=None,
|
13 |
**kwargs,
|
14 |
):
|
15 |
print("=" * 80)
|
16 |
+
print(f"# Init Agent - {env}")
|
17 |
print(f"- epsilon: {epsilon}")
|
18 |
print(f"- gamma: {gamma}")
|
19 |
print(f"- run_name: {run_name}")
|
20 |
self.run_name = run_name
|
21 |
+
self.env_name = env
|
22 |
self.epsilon, self.gamma = epsilon, gamma
|
23 |
|
24 |
self.env_kwargs = {k:v for k,v in kwargs.items() if k in ['render_mode']}
|
policies/DPAgent_FrozenLake-v1_e2500_s200_g0.99_e0.4_first_visit.npy
CHANGED
Binary files a/policies/DPAgent_FrozenLake-v1_e2500_s200_g0.99_e0.4_first_visit.npy and b/policies/DPAgent_FrozenLake-v1_e2500_s200_g0.99_e0.4_first_visit.npy differ
|
|