Spaces:
Sleeping
Sleeping
small cleanup
Browse files- DPAgent.py +5 -6
- dp_policy.npy +0 -0
DPAgent.py
CHANGED
@@ -36,7 +36,7 @@ class DPAgent(Shared):
|
|
36 |
for probability, next_state, reward, done in self.env.P[state][
|
37 |
action
|
38 |
]:
|
39 |
-
|
40 |
expected_value += probability * (
|
41 |
reward + self.gamma * self.V[next_state]
|
42 |
)
|
@@ -49,17 +49,16 @@ class DPAgent(Shared):
|
|
49 |
if delta < self.theta:
|
50 |
break
|
51 |
i += 1
|
52 |
-
# if i %
|
53 |
-
# self.test()
|
54 |
print(f"Iteration {i}: delta={delta}")
|
55 |
-
|
56 |
-
# policy = [self.policy(state, return_value=True)[0] for state in range(self.env.observation_space.n)]
|
57 |
self.Pi = np.empty((self.env.observation_space.n, self.env.action_space.n))
|
58 |
for s in range(self.env.observation_space.n):
|
59 |
for a in range(self.env.action_space.n):
|
60 |
expected_value = 0
|
61 |
for probability, next_state, reward, done in self.env.P[s][a]:
|
62 |
-
|
63 |
expected_value += probability * (
|
64 |
reward + self.gamma * self.V[next_state]
|
65 |
)
|
|
|
36 |
for probability, next_state, reward, done in self.env.P[state][
|
37 |
action
|
38 |
]:
|
39 |
+
if state == self.env.observation_space.n-1: reward = 1
|
40 |
expected_value += probability * (
|
41 |
reward + self.gamma * self.V[next_state]
|
42 |
)
|
|
|
49 |
if delta < self.theta:
|
50 |
break
|
51 |
i += 1
|
52 |
+
# if i % 5 == 0 and i != 0:
|
53 |
+
# self.test(verbose=False)
|
54 |
print(f"Iteration {i}: delta={delta}")
|
55 |
+
|
|
|
56 |
self.Pi = np.empty((self.env.observation_space.n, self.env.action_space.n))
|
57 |
for s in range(self.env.observation_space.n):
|
58 |
for a in range(self.env.action_space.n):
|
59 |
expected_value = 0
|
60 |
for probability, next_state, reward, done in self.env.P[s][a]:
|
61 |
+
if state == self.env.observation_space.n-1: reward = 1
|
62 |
expected_value += probability * (
|
63 |
reward + self.gamma * self.V[next_state]
|
64 |
)
|
dp_policy.npy
DELETED
Binary file (2.18 kB)
|
|