Andrei Cozma commited on
Commit
de8a156
·
1 Parent(s): 69d9811
Files changed (1) hide show
  1. demo.py +13 -10
demo.py CHANGED
@@ -88,6 +88,7 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
88
 
89
  if solved:
90
  episodes_solved += 1
 
91
  state, action, reward = episode_hist[-1]
92
  curr_policy = agent.Pi[state]
93
 
@@ -102,7 +103,13 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
102
  * (viz_w // len(curr_policy)),
103
  ] = p
104
 
105
- policy_viz = np.stack([policy_viz] * 3, axis=-1)
 
 
 
 
 
 
106
  text_offset = 15
107
  cv2.putText(
108
  policy_viz,
@@ -113,27 +120,23 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
113
  ),
114
  cv2.FONT_HERSHEY_SIMPLEX,
115
  1.5,
116
- (255, 255, 255),
117
- 1,
118
  cv2.LINE_AA,
119
  )
120
 
121
- policy_viz = scipy.ndimage.gaussian_filter(policy_viz, sigma=1)
122
- policy_viz = np.clip(
123
- policy_viz * (1 - live_epsilon) + live_epsilon / len(curr_policy), 0, 1
124
- )
125
-
126
  print(
127
- f"Episode: {ep_str(episode)} - step: {step_str(step)} - state: {state} - action: {action} - reward: {reward} (frame time: {1 / render_fps:.2f}s)"
128
  )
129
 
130
- time.sleep(1 / live_render_fps)
131
  # Live-update the agent's epsilon value for demonstration purposes
132
  agent.epsilon = live_epsilon
133
  yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode), ep_str(
134
  episodes_solved
135
  ), step_str(step), state, action, reward, "Running..."
136
 
 
 
137
  yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode), ep_str(
138
  episodes_solved
139
  ), step_str(step), state, action, reward, "Done!"
 
88
 
89
  if solved:
90
  episodes_solved += 1
91
+
92
  state, action, reward = episode_hist[-1]
93
  curr_policy = agent.Pi[state]
94
 
 
103
  * (viz_w // len(curr_policy)),
104
  ] = p
105
 
106
+ policy_viz = scipy.ndimage.gaussian_filter(policy_viz, sigma=1.0)
107
+ policy_viz = np.clip(
108
+ policy_viz * (1.0 - live_epsilon) + live_epsilon / len(curr_policy),
109
+ 0.0,
110
+ 1.0,
111
+ )
112
+
113
  text_offset = 15
114
  cv2.putText(
115
  policy_viz,
 
120
  ),
121
  cv2.FONT_HERSHEY_SIMPLEX,
122
  1.5,
123
+ 1.0,
124
+ 2,
125
  cv2.LINE_AA,
126
  )
127
 
 
 
 
 
 
128
  print(
129
+ f"Episode: {ep_str(episode)} - step: {step_str(step)} - state: {state} - action: {action} - reward: {reward} (epsilon: {live_epsilon:.2f}) (frame time: {1 / render_fps:.2f}s)"
130
  )
131
 
 
132
  # Live-update the agent's epsilon value for demonstration purposes
133
  agent.epsilon = live_epsilon
134
  yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode), ep_str(
135
  episodes_solved
136
  ), step_str(step), state, action, reward, "Running..."
137
 
138
+ time.sleep(1 / live_render_fps)
139
+
140
  yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode), ep_str(
141
  episodes_solved
142
  ), step_str(step), state, action, reward, "Done!"