Spaces:
Sleeping
Sleeping
Andrei Cozma
commited on
Commit
·
de8a156
1
Parent(s):
69d9811
Updates
Browse files
demo.py
CHANGED
@@ -88,6 +88,7 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
|
|
88 |
|
89 |
if solved:
|
90 |
episodes_solved += 1
|
|
|
91 |
state, action, reward = episode_hist[-1]
|
92 |
curr_policy = agent.Pi[state]
|
93 |
|
@@ -102,7 +103,13 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
|
|
102 |
* (viz_w // len(curr_policy)),
|
103 |
] = p
|
104 |
|
105 |
-
policy_viz =
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
text_offset = 15
|
107 |
cv2.putText(
|
108 |
policy_viz,
|
@@ -113,27 +120,23 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
|
|
113 |
),
|
114 |
cv2.FONT_HERSHEY_SIMPLEX,
|
115 |
1.5,
|
116 |
-
|
117 |
-
|
118 |
cv2.LINE_AA,
|
119 |
)
|
120 |
|
121 |
-
policy_viz = scipy.ndimage.gaussian_filter(policy_viz, sigma=1)
|
122 |
-
policy_viz = np.clip(
|
123 |
-
policy_viz * (1 - live_epsilon) + live_epsilon / len(curr_policy), 0, 1
|
124 |
-
)
|
125 |
-
|
126 |
print(
|
127 |
-
f"Episode: {ep_str(episode)} - step: {step_str(step)} - state: {state} - action: {action} - reward: {reward} (frame time: {1 / render_fps:.2f}s)"
|
128 |
)
|
129 |
|
130 |
-
time.sleep(1 / live_render_fps)
|
131 |
# Live-update the agent's epsilon value for demonstration purposes
|
132 |
agent.epsilon = live_epsilon
|
133 |
yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode), ep_str(
|
134 |
episodes_solved
|
135 |
), step_str(step), state, action, reward, "Running..."
|
136 |
|
|
|
|
|
137 |
yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode), ep_str(
|
138 |
episodes_solved
|
139 |
), step_str(step), state, action, reward, "Done!"
|
|
|
88 |
|
89 |
if solved:
|
90 |
episodes_solved += 1
|
91 |
+
|
92 |
state, action, reward = episode_hist[-1]
|
93 |
curr_policy = agent.Pi[state]
|
94 |
|
|
|
103 |
* (viz_w // len(curr_policy)),
|
104 |
] = p
|
105 |
|
106 |
+
policy_viz = scipy.ndimage.gaussian_filter(policy_viz, sigma=1.0)
|
107 |
+
policy_viz = np.clip(
|
108 |
+
policy_viz * (1.0 - live_epsilon) + live_epsilon / len(curr_policy),
|
109 |
+
0.0,
|
110 |
+
1.0,
|
111 |
+
)
|
112 |
+
|
113 |
text_offset = 15
|
114 |
cv2.putText(
|
115 |
policy_viz,
|
|
|
120 |
),
|
121 |
cv2.FONT_HERSHEY_SIMPLEX,
|
122 |
1.5,
|
123 |
+
1.0,
|
124 |
+
2,
|
125 |
cv2.LINE_AA,
|
126 |
)
|
127 |
|
|
|
|
|
|
|
|
|
|
|
128 |
print(
|
129 |
+
f"Episode: {ep_str(episode)} - step: {step_str(step)} - state: {state} - action: {action} - reward: {reward} (epsilon: {live_epsilon:.2f}) (frame time: {1 / render_fps:.2f}s)"
|
130 |
)
|
131 |
|
|
|
132 |
# Live-update the agent's epsilon value for demonstration purposes
|
133 |
agent.epsilon = live_epsilon
|
134 |
yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode), ep_str(
|
135 |
episodes_solved
|
136 |
), step_str(step), state, action, reward, "Running..."
|
137 |
|
138 |
+
time.sleep(1 / live_render_fps)
|
139 |
+
|
140 |
yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode), ep_str(
|
141 |
episodes_solved
|
142 |
), step_str(step), state, action, reward, "Done!"
|