Andrei Cozma commited on
Commit
879176c
·
1 Parent(s): cb18290
Files changed (1) hide show
  1. MonteCarloAgent.py +2 -2
MonteCarloAgent.py CHANGED
@@ -54,13 +54,13 @@ class MonteCarloAgent:
54
  # Sample an action from the policy
55
  action = self.choose_action(state)
56
  # Take the action and observe the reward and next state
57
- next_state, reward, finished, _, _ = self.env.step(action)
58
  # Keeping track of the trajectory
59
  episode_hist.append((state, action, reward))
60
  state = next_state
61
  # This is where the agent got to the goal.
62
  # In the case in which agent jumped off the cliff, it is simply respawned at the start position without termination.
63
- if finished:
64
  break
65
 
66
  return episode_hist, finished
 
54
  # Sample an action from the policy
55
  action = self.choose_action(state)
56
  # Take the action and observe the reward and next state
57
+ next_state, reward, finished, truncated, _ = self.env.step(action)
58
  # Keeping track of the trajectory
59
  episode_hist.append((state, action, reward))
60
  state = next_state
61
  # This is where the agent got to the goal.
62
  # In the case in which agent jumped off the cliff, it is simply respawned at the start position without termination.
63
+ if finished or truncated:
64
  break
65
 
66
  return episode_hist, finished