Andrei Cozma commited on
Commit
9c2fd5e
·
1 Parent(s): e24c7c0
Files changed (1) hide show
  1. demo.py +15 -5
demo.py CHANGED
@@ -99,7 +99,14 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
99
  env_action_map = action_map.get(env_name)
100
 
101
  solved, rgb_array, policy_viz = None, None, None
102
- episode, step, state, action, reward = 0, 0, 0, 0, 0
 
 
 
 
 
 
 
103
  episodes_solved = 0
104
 
105
  def ep_str(episode):
@@ -124,6 +131,9 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
124
  live_steps_forward = None
125
  live_paused = True
126
 
 
 
 
127
  state, action, reward = episode_hist[-1]
128
  curr_policy = agent.Pi[state]
129
 
@@ -189,7 +199,7 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
189
  episode + 1
190
  ), ep_str(episodes_solved), step_str(
191
  step
192
- ), state, action, reward, "Running..."
193
 
194
  time.sleep(1 / live_render_fps)
195
 
@@ -198,7 +208,7 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
198
  episode + 1
199
  ), ep_str(episodes_solved), step_str(
200
  step
201
- ), state, action, reward, "Paused..."
202
  time.sleep(1 / live_render_fps)
203
 
204
  if solved:
@@ -253,11 +263,11 @@ with gr.Blocks(title="CS581 Demo") as demo:
253
  with gr.Row():
254
  out_state = gr.components.Textbox(label="Current State")
255
  out_action = gr.components.Textbox(label="Chosen Action")
256
- out_reward = gr.components.Textbox(label="Reward Received")
257
 
258
  out_image_policy = gr.components.Image(
259
  value=np.ones((16, 128)),
260
- label="policy[state]",
261
  type="numpy",
262
  image_mode="RGB",
263
  )
 
99
  env_action_map = action_map.get(env_name)
100
 
101
  solved, rgb_array, policy_viz = None, None, None
102
+ episode, step, state, action, reward, last_reward = (
103
+ None,
104
+ None,
105
+ None,
106
+ None,
107
+ None,
108
+ None,
109
+ )
110
  episodes_solved = 0
111
 
112
  def ep_str(episode):
 
131
  live_steps_forward = None
132
  live_paused = True
133
 
134
+ _, _, last_reward = (
135
+ episode_hist[-2] if len(episode_hist) > 1 else (None, None, None)
136
+ )
137
  state, action, reward = episode_hist[-1]
138
  curr_policy = agent.Pi[state]
139
 
 
199
  episode + 1
200
  ), ep_str(episodes_solved), step_str(
201
  step
202
+ ), state, action, last_reward, "Running..."
203
 
204
  time.sleep(1 / live_render_fps)
205
 
 
208
  episode + 1
209
  ), ep_str(episodes_solved), step_str(
210
  step
211
+ ), state, action, last_reward, "Paused..."
212
  time.sleep(1 / live_render_fps)
213
 
214
  if solved:
 
263
  with gr.Row():
264
  out_state = gr.components.Textbox(label="Current State")
265
  out_action = gr.components.Textbox(label="Chosen Action")
266
+ out_reward = gr.components.Textbox(label="Last Reward")
267
 
268
  out_image_policy = gr.components.Image(
269
  value=np.ones((16, 128)),
270
+ label="Action Sampled vs Policy Distribution for Current State",
271
  type="numpy",
272
  image_mode="RGB",
273
  )