Andrei Cozma commited on
Commit
6f3d0ef
·
1 Parent(s): 6a793bc
Files changed (1) hide show
  1. demo.py +20 -20
demo.py CHANGED
@@ -73,7 +73,6 @@ def change_epsilon(x):
73
 
74
  def change_paused(x):
75
  print("Changing paused:", x)
76
-
77
  global live_paused
78
  live_paused = pause_val_map[x]
79
  next_val = pause_val_map_inv[not live_paused]
@@ -134,7 +133,7 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
134
  return f"{step + 1}"
135
 
136
  for episode in range(n_test_episodes):
137
- time.sleep(1.0)
138
 
139
  for step, (episode_hist, solved, rgb_array) in enumerate(
140
  agent.generate_episode(
@@ -147,7 +146,7 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
147
  state, action, reward = episode_hist[-1]
148
  curr_policy = agent.Pi[state]
149
 
150
- rgb_array_height, rgb_array_width = 150, 512
151
  rgb_array = cv2.resize(
152
  rgb_array,
153
  (
@@ -156,20 +155,21 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
156
  ),
157
  interpolation=cv2.INTER_AREA,
158
  )
159
- rgb_array_new = np.pad(
160
- rgb_array,
161
- (
162
- (0, 0),
163
  (
164
- (rgb_array_width - rgb_array.shape[1]) // 2,
165
- (rgb_array_width - rgb_array.shape[1]) // 2,
 
 
 
 
166
  ),
167
- (0, 0),
168
- ),
169
- "constant",
170
- )
171
-
172
- rgb_array = np.uint8(rgb_array_new)
173
 
174
  viz_w = 512
175
  viz_h = viz_w // len(curr_policy)
@@ -272,6 +272,8 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
272
  if solved:
273
  episodes_solved += 1
274
 
 
 
275
  yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode + 1), ep_str(
276
  episodes_solved
277
  ), step_str(step), state, action, reward, "Done!"
@@ -279,9 +281,8 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
279
 
280
  with gr.Blocks(title="CS581 Demo") as demo:
281
  gr.components.HTML(
282
- "<h1>Reinforcement Learning: From Dynamic Programming to Monte-Carlo (Demo)</h1>"
283
  )
284
- gr.components.HTML("<h3>Authors: Andrei Cozma and Landon Harris</h3>")
285
 
286
  gr.components.HTML("<h2>Select Configuration:</h2>")
287
  with gr.Row():
@@ -310,7 +311,7 @@ with gr.Blocks(title="CS581 Demo") as demo:
310
 
311
  btn_run = gr.components.Button("👀 Select", interactive=bool(all_policies))
312
 
313
- gr.components.HTML("<h2>Live Statistics & Policy Visualization:</h2>")
314
  with gr.Row():
315
  with gr.Column():
316
  with gr.Row():
@@ -330,7 +331,6 @@ with gr.Blocks(title="CS581 Demo") as demo:
330
  image_mode="RGB",
331
  )
332
 
333
- gr.components.HTML("<h2>Live Customization:</h2>")
334
  with gr.Row():
335
  input_epsilon = gr.components.Slider(
336
  minimum=0,
@@ -353,7 +353,7 @@ with gr.Blocks(title="CS581 Demo") as demo:
353
  btn_pause = gr.components.Button(
354
  pause_val_map_inv[not live_paused], interactive=True
355
  )
356
- btn_forward = gr.components.Button("⏩ Step", interactive=False)
357
 
358
  btn_pause.click(
359
  fn=change_paused,
 
73
 
74
  def change_paused(x):
75
  print("Changing paused:", x)
 
76
  global live_paused
77
  live_paused = pause_val_map[x]
78
  next_val = pause_val_map_inv[not live_paused]
 
133
  return f"{step + 1}"
134
 
135
  for episode in range(n_test_episodes):
136
+ time.sleep(0.5)
137
 
138
  for step, (episode_hist, solved, rgb_array) in enumerate(
139
  agent.generate_episode(
 
146
  state, action, reward = episode_hist[-1]
147
  curr_policy = agent.Pi[state]
148
 
149
+ rgb_array_height, rgb_array_width = 384, 768
150
  rgb_array = cv2.resize(
151
  rgb_array,
152
  (
 
155
  ),
156
  interpolation=cv2.INTER_AREA,
157
  )
158
+
159
+ if rgb_array.shape[1] < rgb_array_width:
160
+ rgb_array_new = np.pad(
161
+ rgb_array,
162
  (
163
+ (0, 0),
164
+ (
165
+ (rgb_array_width - rgb_array.shape[1]) // 2,
166
+ (rgb_array_width - rgb_array.shape[1]) // 2,
167
+ ),
168
+ (0, 0),
169
  ),
170
+ "constant",
171
+ )
172
+ rgb_array = np.uint8(rgb_array_new)
 
 
 
173
 
174
  viz_w = 512
175
  viz_h = viz_w // len(curr_policy)
 
272
  if solved:
273
  episodes_solved += 1
274
 
275
+ time.sleep(0.5)
276
+
277
  yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode + 1), ep_str(
278
  episodes_solved
279
  ), step_str(step), state, action, reward, "Done!"
 
281
 
282
  with gr.Blocks(title="CS581 Demo") as demo:
283
  gr.components.HTML(
284
+ "<h1>CS581 Final Project Demo - Reinforcement Learning: From Dynamic Programming to Monte-Carlo</h1>"
285
  )
 
286
 
287
  gr.components.HTML("<h2>Select Configuration:</h2>")
288
  with gr.Row():
 
311
 
312
  btn_run = gr.components.Button("👀 Select", interactive=bool(all_policies))
313
 
314
+ gr.components.HTML("<h2>Live Visualization & Information:</h2>")
315
  with gr.Row():
316
  with gr.Column():
317
  with gr.Row():
 
331
  image_mode="RGB",
332
  )
333
 
 
334
  with gr.Row():
335
  input_epsilon = gr.components.Slider(
336
  minimum=0,
 
353
  btn_pause = gr.components.Button(
354
  pause_val_map_inv[not live_paused], interactive=True
355
  )
356
+ btn_forward = gr.components.Button("⏩ Step")
357
 
358
  btn_pause.click(
359
  fn=change_paused,