Andrei Cozma commited on
Commit
552dbe8
·
1 Parent(s): 8ae24a2
Files changed (1) hide show
  1. demo.py +1 -3
demo.py CHANGED
@@ -159,9 +159,7 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
159
  episode_hist[-2] if len(episode_hist) > 1 else (None, None, None)
160
  )
161
  state, action, reward = episode_hist[-1]
162
- curr_policy = agent.policy(state)
163
- curr_policy -= np.min(curr_policy)
164
- curr_policy = curr_policy / np.sum(curr_policy)
165
 
166
  frame_policy_h = frame_policy_res // len(curr_policy)
167
  frame_policy = np.zeros((frame_policy_h, frame_policy_res))
 
159
  episode_hist[-2] if len(episode_hist) > 1 else (None, None, None)
160
  )
161
  state, action, reward = episode_hist[-1]
162
+ curr_policy = agent.Pi[state]
 
 
163
 
164
  frame_policy_h = frame_policy_res // len(curr_policy)
165
  frame_policy = np.zeros((frame_policy_h, frame_policy_res))