lharri73 commited on
Commit
3266489
·
1 Parent(s): 1af9e6d

fix reward func

Browse files
Files changed (1) hide show
  1. DPAgent.py +4 -2
DPAgent.py CHANGED
@@ -36,7 +36,8 @@ class DPAgent(Shared):
36
  for probability, next_state, reward, done in self.env.P[state][
37
  action
38
  ]:
39
- if state == self.env.observation_space.n-1: reward = 1
 
40
  expected_value += probability * (
41
  reward + self.gamma * self.V[next_state]
42
  )
@@ -58,7 +59,8 @@ class DPAgent(Shared):
58
  for a in range(self.env.action_space.n):
59
  expected_value = 0
60
  for probability, next_state, reward, done in self.env.P[s][a]:
61
- if state == self.env.observation_space.n-1: reward = 1
 
62
  expected_value += probability * (
63
  reward + self.gamma * self.V[next_state]
64
  )
 
36
  for probability, next_state, reward, done in self.env.P[state][
37
  action
38
  ]:
39
+ if self.env_name == "CliffWalking-v0" and state == self.env.observation_space.n-1:
40
+ reward = 1
41
  expected_value += probability * (
42
  reward + self.gamma * self.V[next_state]
43
  )
 
59
  for a in range(self.env.action_space.n):
60
  expected_value = 0
61
  for probability, next_state, reward, done in self.env.P[s][a]:
62
+ if self.env_name == "CliffWalking-v0" and state == self.env.observation_space.n-1:
63
+ reward = 1
64
  expected_value += probability * (
65
  reward + self.gamma * self.V[next_state]
66
  )