Andrei Cozma commited on
Commit
ed9cf21
·
1 Parent(s): de8a156
Files changed (1) hide show
  1. demo.py +52 -21
demo.py CHANGED
@@ -7,6 +7,9 @@ from MonteCarloAgent import MonteCarloAgent
7
  import scipy.ndimage
8
  import cv2
9
 
 
 
 
10
  # For the dropdown list of policies
11
  policies_folder = "policies"
12
  try:
@@ -22,6 +25,14 @@ agent_map = {
22
  "MonteCarloAgent": MonteCarloAgent,
23
  # TODO: Add DP Agent
24
  }
 
 
 
 
 
 
 
 
25
 
26
  # Global variables to allow changing it on the fly
27
  live_render_fps = 10
@@ -64,15 +75,14 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
64
 
65
  agent = agent_map[agent_type](env_name, render_mode="rgb_array")
66
  agent.load_policy(policy_path)
 
67
 
68
- rgb_array = None
69
- policy_viz = None
70
- episode, step = 0, 0
71
- state, action, reward = 0, 0, 0
72
  episodes_solved = 0
73
 
74
  def ep_str(episode):
75
- return f"{episode + 1} / {n_test_episodes} ({(episode + 1) / n_test_episodes * 100:.2f}%)"
76
 
77
  def step_str(step):
78
  return f"{step + 1}"
@@ -86,9 +96,6 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
86
  while live_paused:
87
  time.sleep(0.1)
88
 
89
- if solved:
90
- episodes_solved += 1
91
-
92
  state, action, reward = episode_hist[-1]
93
  curr_policy = agent.Pi[state]
94
 
@@ -110,34 +117,58 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
110
  1.0,
111
  )
112
 
113
- text_offset = 15
114
  cv2.putText(
115
  policy_viz,
116
  str(action),
117
  (
118
- int((action + 0.5) * viz_w // len(curr_policy) - text_offset),
119
- viz_h // 2 + text_offset,
120
  ),
121
  cv2.FONT_HERSHEY_SIMPLEX,
122
- 1.5,
123
  1.0,
124
  2,
125
  cv2.LINE_AA,
126
  )
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  print(
129
- f"Episode: {ep_str(episode)} - step: {step_str(step)} - state: {state} - action: {action} - reward: {reward} (epsilon: {live_epsilon:.2f}) (frame time: {1 / render_fps:.2f}s)"
130
  )
131
 
132
  # Live-update the agent's epsilon value for demonstration purposes
133
  agent.epsilon = live_epsilon
134
- yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode), ep_str(
135
- episodes_solved
136
- ), step_str(step), state, action, reward, "Running..."
 
 
137
 
138
  time.sleep(1 / live_render_fps)
139
 
140
- yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode), ep_str(
 
 
 
141
  episodes_solved
142
  ), step_str(step), state, action, reward, "Done!"
143
 
@@ -162,14 +193,14 @@ with gr.Blocks(title="CS581 Demo") as demo:
162
  with gr.Row():
163
  input_n_test_episodes = gr.components.Slider(
164
  minimum=1,
165
- maximum=500,
166
- value=500,
167
  label="Number of episodes",
168
  )
169
  input_max_steps = gr.components.Slider(
170
  minimum=1,
171
- maximum=500,
172
- value=500,
173
  label="Max steps per episode",
174
  )
175
 
 
7
  import scipy.ndimage
8
  import cv2
9
 
10
+ default_n_test_episodes = 10
11
+ default_max_steps = 500
12
+
13
  # For the dropdown list of policies
14
  policies_folder = "policies"
15
  try:
 
25
  "MonteCarloAgent": MonteCarloAgent,
26
  # TODO: Add DP Agent
27
  }
28
+ action_map = {
29
+ "CliffWalking-v0": {
30
+ 0: "up",
31
+ 1: "right",
32
+ 2: "down",
33
+ 3: "left",
34
+ },
35
+ }
36
 
37
  # Global variables to allow changing it on the fly
38
  live_render_fps = 10
 
75
 
76
  agent = agent_map[agent_type](env_name, render_mode="rgb_array")
77
  agent.load_policy(policy_path)
78
+ env_action_map = action_map.get(env_name)
79
 
80
+ solved, rgb_array, policy_viz = None, None, None
81
+ episode, step, state, action, reward = 0, 0, 0, 0, 0
 
 
82
  episodes_solved = 0
83
 
84
  def ep_str(episode):
85
+ return f"{episode} / {n_test_episodes} ({(episode + 1) / n_test_episodes * 100:.2f}%)"
86
 
87
  def step_str(step):
88
  return f"{step + 1}"
 
96
  while live_paused:
97
  time.sleep(0.1)
98
 
 
 
 
99
  state, action, reward = episode_hist[-1]
100
  curr_policy = agent.Pi[state]
101
 
 
117
  1.0,
118
  )
119
 
 
120
  cv2.putText(
121
  policy_viz,
122
  str(action),
123
  (
124
+ int((action + 0.5) * viz_w // len(curr_policy) - 8),
125
+ viz_h // 2 - 10,
126
  ),
127
  cv2.FONT_HERSHEY_SIMPLEX,
128
+ 1.0,
129
  1.0,
130
  2,
131
  cv2.LINE_AA,
132
  )
133
 
134
+ if env_action_map:
135
+ action_name = env_action_map.get(action, action)
136
+
137
+ cv2.putText(
138
+ policy_viz,
139
+ action_name,
140
+ (
141
+ int(
142
+ (action + 0.5) * viz_w // len(curr_policy)
143
+ - 5 * len(action_name)
144
+ ),
145
+ viz_h // 2 + 20,
146
+ ),
147
+ cv2.FONT_HERSHEY_SIMPLEX,
148
+ 0.6,
149
+ 1.0,
150
+ 2,
151
+ cv2.LINE_AA,
152
+ )
153
+
154
  print(
155
+ f"Episode: {ep_str(episode + 1)} - step: {step_str(step)} - state: {state} - action: {action} - reward: {reward} (epsilon: {live_epsilon:.2f}) (frame time: {1 / render_fps:.2f}s)"
156
  )
157
 
158
  # Live-update the agent's epsilon value for demonstration purposes
159
  agent.epsilon = live_epsilon
160
+ yield agent_type, env_name, rgb_array, policy_viz, ep_str(
161
+ episode + 1
162
+ ), ep_str(episodes_solved), step_str(
163
+ step
164
+ ), state, action, reward, "Running..."
165
 
166
  time.sleep(1 / live_render_fps)
167
 
168
+ if solved:
169
+ episodes_solved += 1
170
+
171
+ yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode + 1), ep_str(
172
  episodes_solved
173
  ), step_str(step), state, action, reward, "Done!"
174
 
 
193
  with gr.Row():
194
  input_n_test_episodes = gr.components.Slider(
195
  minimum=1,
196
+ maximum=1000,
197
+ value=default_n_test_episodes,
198
  label="Number of episodes",
199
  )
200
  input_max_steps = gr.components.Slider(
201
  minimum=1,
202
+ maximum=1000,
203
+ value=default_max_steps,
204
  label="Max steps per episode",
205
  )
206