Spaces:
Sleeping
Sleeping
Andrei Cozma
commited on
Commit
·
ed9cf21
1
Parent(s):
de8a156
Updates
Browse files
demo.py
CHANGED
@@ -7,6 +7,9 @@ from MonteCarloAgent import MonteCarloAgent
|
|
7 |
import scipy.ndimage
|
8 |
import cv2
|
9 |
|
|
|
|
|
|
|
10 |
# For the dropdown list of policies
|
11 |
policies_folder = "policies"
|
12 |
try:
|
@@ -22,6 +25,14 @@ agent_map = {
|
|
22 |
"MonteCarloAgent": MonteCarloAgent,
|
23 |
# TODO: Add DP Agent
|
24 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
# Global variables to allow changing it on the fly
|
27 |
live_render_fps = 10
|
@@ -64,15 +75,14 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
|
|
64 |
|
65 |
agent = agent_map[agent_type](env_name, render_mode="rgb_array")
|
66 |
agent.load_policy(policy_path)
|
|
|
67 |
|
68 |
-
rgb_array = None
|
69 |
-
|
70 |
-
episode, step = 0, 0
|
71 |
-
state, action, reward = 0, 0, 0
|
72 |
episodes_solved = 0
|
73 |
|
74 |
def ep_str(episode):
|
75 |
-
return f"{episode
|
76 |
|
77 |
def step_str(step):
|
78 |
return f"{step + 1}"
|
@@ -86,9 +96,6 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
|
|
86 |
while live_paused:
|
87 |
time.sleep(0.1)
|
88 |
|
89 |
-
if solved:
|
90 |
-
episodes_solved += 1
|
91 |
-
|
92 |
state, action, reward = episode_hist[-1]
|
93 |
curr_policy = agent.Pi[state]
|
94 |
|
@@ -110,34 +117,58 @@ def run(policy_fname, n_test_episodes, max_steps, render_fps, epsilon):
|
|
110 |
1.0,
|
111 |
)
|
112 |
|
113 |
-
text_offset = 15
|
114 |
cv2.putText(
|
115 |
policy_viz,
|
116 |
str(action),
|
117 |
(
|
118 |
-
int((action + 0.5) * viz_w // len(curr_policy) -
|
119 |
-
viz_h // 2
|
120 |
),
|
121 |
cv2.FONT_HERSHEY_SIMPLEX,
|
122 |
-
1.
|
123 |
1.0,
|
124 |
2,
|
125 |
cv2.LINE_AA,
|
126 |
)
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
print(
|
129 |
-
f"Episode: {ep_str(episode)} - step: {step_str(step)} - state: {state} - action: {action} - reward: {reward} (epsilon: {live_epsilon:.2f}) (frame time: {1 / render_fps:.2f}s)"
|
130 |
)
|
131 |
|
132 |
# Live-update the agent's epsilon value for demonstration purposes
|
133 |
agent.epsilon = live_epsilon
|
134 |
-
yield agent_type, env_name, rgb_array, policy_viz, ep_str(
|
135 |
-
|
136 |
-
),
|
|
|
|
|
137 |
|
138 |
time.sleep(1 / live_render_fps)
|
139 |
|
140 |
-
|
|
|
|
|
|
|
141 |
episodes_solved
|
142 |
), step_str(step), state, action, reward, "Done!"
|
143 |
|
@@ -162,14 +193,14 @@ with gr.Blocks(title="CS581 Demo") as demo:
|
|
162 |
with gr.Row():
|
163 |
input_n_test_episodes = gr.components.Slider(
|
164 |
minimum=1,
|
165 |
-
maximum=
|
166 |
-
value=
|
167 |
label="Number of episodes",
|
168 |
)
|
169 |
input_max_steps = gr.components.Slider(
|
170 |
minimum=1,
|
171 |
-
maximum=
|
172 |
-
value=
|
173 |
label="Max steps per episode",
|
174 |
)
|
175 |
|
|
|
7 |
import scipy.ndimage
|
8 |
import cv2
|
9 |
|
10 |
+
default_n_test_episodes = 10
|
11 |
+
default_max_steps = 500
|
12 |
+
|
13 |
# For the dropdown list of policies
|
14 |
policies_folder = "policies"
|
15 |
try:
|
|
|
25 |
"MonteCarloAgent": MonteCarloAgent,
|
26 |
# TODO: Add DP Agent
|
27 |
}
|
28 |
+
action_map = {
|
29 |
+
"CliffWalking-v0": {
|
30 |
+
0: "up",
|
31 |
+
1: "right",
|
32 |
+
2: "down",
|
33 |
+
3: "left",
|
34 |
+
},
|
35 |
+
}
|
36 |
|
37 |
# Global variables to allow changing it on the fly
|
38 |
live_render_fps = 10
|
|
|
75 |
|
76 |
agent = agent_map[agent_type](env_name, render_mode="rgb_array")
|
77 |
agent.load_policy(policy_path)
|
78 |
+
env_action_map = action_map.get(env_name)
|
79 |
|
80 |
+
solved, rgb_array, policy_viz = None, None, None
|
81 |
+
episode, step, state, action, reward = 0, 0, 0, 0, 0
|
|
|
|
|
82 |
episodes_solved = 0
|
83 |
|
84 |
def ep_str(episode):
|
85 |
+
return f"{episode} / {n_test_episodes} ({(episode + 1) / n_test_episodes * 100:.2f}%)"
|
86 |
|
87 |
def step_str(step):
|
88 |
return f"{step + 1}"
|
|
|
96 |
while live_paused:
|
97 |
time.sleep(0.1)
|
98 |
|
|
|
|
|
|
|
99 |
state, action, reward = episode_hist[-1]
|
100 |
curr_policy = agent.Pi[state]
|
101 |
|
|
|
117 |
1.0,
|
118 |
)
|
119 |
|
|
|
120 |
cv2.putText(
|
121 |
policy_viz,
|
122 |
str(action),
|
123 |
(
|
124 |
+
int((action + 0.5) * viz_w // len(curr_policy) - 8),
|
125 |
+
viz_h // 2 - 10,
|
126 |
),
|
127 |
cv2.FONT_HERSHEY_SIMPLEX,
|
128 |
+
1.0,
|
129 |
1.0,
|
130 |
2,
|
131 |
cv2.LINE_AA,
|
132 |
)
|
133 |
|
134 |
+
if env_action_map:
|
135 |
+
action_name = env_action_map.get(action, action)
|
136 |
+
|
137 |
+
cv2.putText(
|
138 |
+
policy_viz,
|
139 |
+
action_name,
|
140 |
+
(
|
141 |
+
int(
|
142 |
+
(action + 0.5) * viz_w // len(curr_policy)
|
143 |
+
- 5 * len(action_name)
|
144 |
+
),
|
145 |
+
viz_h // 2 + 20,
|
146 |
+
),
|
147 |
+
cv2.FONT_HERSHEY_SIMPLEX,
|
148 |
+
0.6,
|
149 |
+
1.0,
|
150 |
+
2,
|
151 |
+
cv2.LINE_AA,
|
152 |
+
)
|
153 |
+
|
154 |
print(
|
155 |
+
f"Episode: {ep_str(episode + 1)} - step: {step_str(step)} - state: {state} - action: {action} - reward: {reward} (epsilon: {live_epsilon:.2f}) (frame time: {1 / render_fps:.2f}s)"
|
156 |
)
|
157 |
|
158 |
# Live-update the agent's epsilon value for demonstration purposes
|
159 |
agent.epsilon = live_epsilon
|
160 |
+
yield agent_type, env_name, rgb_array, policy_viz, ep_str(
|
161 |
+
episode + 1
|
162 |
+
), ep_str(episodes_solved), step_str(
|
163 |
+
step
|
164 |
+
), state, action, reward, "Running..."
|
165 |
|
166 |
time.sleep(1 / live_render_fps)
|
167 |
|
168 |
+
if solved:
|
169 |
+
episodes_solved += 1
|
170 |
+
|
171 |
+
yield agent_type, env_name, rgb_array, policy_viz, ep_str(episode + 1), ep_str(
|
172 |
episodes_solved
|
173 |
), step_str(step), state, action, reward, "Done!"
|
174 |
|
|
|
193 |
with gr.Row():
|
194 |
input_n_test_episodes = gr.components.Slider(
|
195 |
minimum=1,
|
196 |
+
maximum=1000,
|
197 |
+
value=default_n_test_episodes,
|
198 |
label="Number of episodes",
|
199 |
)
|
200 |
input_max_steps = gr.components.Slider(
|
201 |
minimum=1,
|
202 |
+
maximum=1000,
|
203 |
+
value=default_max_steps,
|
204 |
label="Max steps per episode",
|
205 |
)
|
206 |
|