Spaces:
Sleeping
Sleeping
Andrei Cozma
commited on
Commit
·
9c2fd5e
1
Parent(s):
e24c7c0
Updates
Browse files
demo.py
CHANGED
@@ -99,7 +99,14 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
|
|
99 |
env_action_map = action_map.get(env_name)
|
100 |
|
101 |
solved, rgb_array, policy_viz = None, None, None
|
102 |
-
episode, step, state, action, reward
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
episodes_solved = 0
|
104 |
|
105 |
def ep_str(episode):
|
@@ -124,6 +131,9 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
|
|
124 |
live_steps_forward = None
|
125 |
live_paused = True
|
126 |
|
|
|
|
|
|
|
127 |
state, action, reward = episode_hist[-1]
|
128 |
curr_policy = agent.Pi[state]
|
129 |
|
@@ -189,7 +199,7 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
|
|
189 |
episode + 1
|
190 |
), ep_str(episodes_solved), step_str(
|
191 |
step
|
192 |
-
), state, action,
|
193 |
|
194 |
time.sleep(1 / live_render_fps)
|
195 |
|
@@ -198,7 +208,7 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
|
|
198 |
episode + 1
|
199 |
), ep_str(episodes_solved), step_str(
|
200 |
step
|
201 |
-
), state, action,
|
202 |
time.sleep(1 / live_render_fps)
|
203 |
|
204 |
if solved:
|
@@ -253,11 +263,11 @@ with gr.Blocks(title="CS581 Demo") as demo:
|
|
253 |
with gr.Row():
|
254 |
out_state = gr.components.Textbox(label="Current State")
|
255 |
out_action = gr.components.Textbox(label="Chosen Action")
|
256 |
-
out_reward = gr.components.Textbox(label="Reward
|
257 |
|
258 |
out_image_policy = gr.components.Image(
|
259 |
value=np.ones((16, 128)),
|
260 |
-
label="
|
261 |
type="numpy",
|
262 |
image_mode="RGB",
|
263 |
)
|
|
|
99 |
env_action_map = action_map.get(env_name)
|
100 |
|
101 |
solved, rgb_array, policy_viz = None, None, None
|
102 |
+
episode, step, state, action, reward, last_reward = (
|
103 |
+
None,
|
104 |
+
None,
|
105 |
+
None,
|
106 |
+
None,
|
107 |
+
None,
|
108 |
+
None,
|
109 |
+
)
|
110 |
episodes_solved = 0
|
111 |
|
112 |
def ep_str(episode):
|
|
|
131 |
live_steps_forward = None
|
132 |
live_paused = True
|
133 |
|
134 |
+
_, _, last_reward = (
|
135 |
+
episode_hist[-2] if len(episode_hist) > 1 else (None, None, None)
|
136 |
+
)
|
137 |
state, action, reward = episode_hist[-1]
|
138 |
curr_policy = agent.Pi[state]
|
139 |
|
|
|
199 |
episode + 1
|
200 |
), ep_str(episodes_solved), step_str(
|
201 |
step
|
202 |
+
), state, action, last_reward, "Running..."
|
203 |
|
204 |
time.sleep(1 / live_render_fps)
|
205 |
|
|
|
208 |
episode + 1
|
209 |
), ep_str(episodes_solved), step_str(
|
210 |
step
|
211 |
+
), state, action, last_reward, "Paused..."
|
212 |
time.sleep(1 / live_render_fps)
|
213 |
|
214 |
if solved:
|
|
|
263 |
with gr.Row():
|
264 |
out_state = gr.components.Textbox(label="Current State")
|
265 |
out_action = gr.components.Textbox(label="Chosen Action")
|
266 |
+
out_reward = gr.components.Textbox(label="Last Reward")
|
267 |
|
268 |
out_image_policy = gr.components.Image(
|
269 |
value=np.ones((16, 128)),
|
270 |
+
label="Action Sampled vs Policy Distribution for Current State",
|
271 |
type="numpy",
|
272 |
image_mode="RGB",
|
273 |
)
|