lharri73 commited on
Commit
1af9e6d
·
1 Parent(s): 39f7a11

small cleanup

Browse files
Files changed (2) hide show
  1. DPAgent.py +5 -6
  2. dp_policy.npy +0 -0
DPAgent.py CHANGED
@@ -36,7 +36,7 @@ class DPAgent(Shared):
36
  for probability, next_state, reward, done in self.env.P[state][
37
  action
38
  ]:
39
- # if state == self.env.observation_space.n-1: reward = 1
40
  expected_value += probability * (
41
  reward + self.gamma * self.V[next_state]
42
  )
@@ -49,17 +49,16 @@ class DPAgent(Shared):
49
  if delta < self.theta:
50
  break
51
  i += 1
52
- # if i % 100 == 0 and i != 0:
53
- # self.test()
54
  print(f"Iteration {i}: delta={delta}")
55
- # break
56
- # policy = [self.policy(state, return_value=True)[0] for state in range(self.env.observation_space.n)]
57
  self.Pi = np.empty((self.env.observation_space.n, self.env.action_space.n))
58
  for s in range(self.env.observation_space.n):
59
  for a in range(self.env.action_space.n):
60
  expected_value = 0
61
  for probability, next_state, reward, done in self.env.P[s][a]:
62
- # if state == self.env.observation_space.n-1: reward = 1
63
  expected_value += probability * (
64
  reward + self.gamma * self.V[next_state]
65
  )
 
36
  for probability, next_state, reward, done in self.env.P[state][
37
  action
38
  ]:
39
+ if state == self.env.observation_space.n-1: reward = 1
40
  expected_value += probability * (
41
  reward + self.gamma * self.V[next_state]
42
  )
 
49
  if delta < self.theta:
50
  break
51
  i += 1
52
+ # if i % 5 == 0 and i != 0:
53
+ # self.test(verbose=False)
54
  print(f"Iteration {i}: delta={delta}")
55
+
 
56
  self.Pi = np.empty((self.env.observation_space.n, self.env.action_space.n))
57
  for s in range(self.env.observation_space.n):
58
  for a in range(self.env.action_space.n):
59
  expected_value = 0
60
  for probability, next_state, reward, done in self.env.P[s][a]:
61
+ if state == self.env.observation_space.n-1: reward = 1
62
  expected_value += probability * (
63
  reward + self.gamma * self.V[next_state]
64
  )
dp_policy.npy DELETED
Binary file (2.18 kB)