Spaces:
Sleeping
Sleeping
Andrei Cozma
commited on
Commit
Β·
ec8233c
1
Parent(s):
46b0409
Updates
Browse files- AgentBase.py +15 -18
- MCAgent.py +1 -1
- demo.py +27 -14
- policies/{DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200.npy β DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e1500_s200.npy} +0 -0
- policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:76291_e2500_s200.npy +0 -0
- policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:78250_e2500_s200.npy +0 -0
- policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:48627_e2500_s200.npy +0 -0
- policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:80137_e2500_s200.npy +0 -0
- policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:10976_e2500_s200.npy +0 -0
- policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:62659_e2500_s200.npy +0 -0
- policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:35280_e1500_s200.npy +0 -0
- policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:97556_e2500_s200.npy β DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:61252_e1500_s200.npy} +0 -0
- policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:94515_e2500_s200.npy β DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:96883_e1500_s200.npy} +0 -0
- policies/DPAgent_Taxi-v3_gamma:0.99_epsilon:0.4_e15000_s200.npy +0 -0
- policies/{MCAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200_first_visit.npy β MCAgent_CliffWalking-v0_gamma:1.0_epsilon:0.4_e1500_s200_first_visit.npy} +0 -0
- policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:47783_e2500_s200_first_visit.npy β MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:35280_e1500_s200_first_visit.npy} +0 -0
- policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:27843_e2500_s200_first_visit.npy β MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:61252_e1500_s200_first_visit.npy} +0 -0
- policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:10485_e2500_s200_first_visit.npy β MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:96883_e1500_s200_first_visit.npy} +0 -0
- policies/MCAgent_Taxi-v3_gamma:1.0_epsilon:0.75_e15000_s200_first_visit.npy +0 -0
AgentBase.py
CHANGED
@@ -82,19 +82,22 @@ class AgentBase:
|
|
82 |
|
83 |
def generate_episode(self, max_steps=500, render=False, **kwargs):
|
84 |
state, _ = self.env.reset()
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
)
|
90 |
|
|
|
91 |
# Generate an episode following the current policy
|
92 |
-
|
93 |
-
#
|
|
|
|
|
94 |
action = self.choose_action(state, **kwargs)
|
|
|
|
|
95 |
# Take the action and observe the reward and next state
|
96 |
next_state, reward, done, _, _ = self.env.step(action)
|
97 |
-
|
98 |
if self.env_name == "FrozenLake-v1":
|
99 |
if done:
|
100 |
reward = 100 if reward == 1 else -10
|
@@ -102,33 +105,27 @@ class AgentBase:
|
|
102 |
reward = -1
|
103 |
|
104 |
# Keeping track of the trajectory
|
105 |
-
episode_hist
|
|
|
106 |
yield episode_hist, solved, rgb_array
|
107 |
|
108 |
-
# Rendering new frame if needed
|
109 |
-
rgb_array = self.env.render() if render else None
|
110 |
-
|
111 |
# For CliffWalking-v0 and Taxi-v3, the episode is solved when it terminates
|
112 |
if done and self.env_name in ["CliffWalking-v0", "Taxi-v3"]:
|
113 |
solved = True
|
114 |
-
break
|
115 |
|
116 |
# For FrozenLake-v1, the episode terminates when the agent moves into a hole or reaches the goal
|
117 |
# We consider the episode solved when the agent reaches the goal
|
118 |
if done and self.env_name == "FrozenLake-v1":
|
119 |
if next_state == self.env.nrow * self.env.ncol - 1:
|
120 |
solved = True
|
121 |
-
break
|
122 |
else:
|
123 |
# Instead of terminating the episode when the agent moves into a hole, we reset the environment
|
124 |
# This is to keep consistent with the other environments
|
125 |
-
done = False
|
126 |
next_state, _ = self.env.reset()
|
127 |
|
128 |
-
if solved or done:
|
129 |
-
break
|
130 |
-
|
131 |
state = next_state
|
|
|
132 |
|
133 |
rgb_array = self.env.render() if render else None
|
134 |
yield episode_hist, solved, rgb_array
|
|
|
82 |
|
83 |
def generate_episode(self, max_steps=500, render=False, **kwargs):
|
84 |
state, _ = self.env.reset()
|
85 |
+
# action = self.choose_action(state, **kwargs)
|
86 |
+
episode_hist = []
|
87 |
+
solved, done = False, False
|
88 |
+
rgb_array = self.env.render() if render else None
|
|
|
89 |
|
90 |
+
i = 0
|
91 |
# Generate an episode following the current policy
|
92 |
+
while i < max_steps and not solved and not done:
|
93 |
+
# Render the environment if needed
|
94 |
+
rgb_array = self.env.render() if render else None
|
95 |
+
# Sample the next action from the policy
|
96 |
action = self.choose_action(state, **kwargs)
|
97 |
+
# Keeping track of the trajectory
|
98 |
+
episode_hist.append((state, action, None))
|
99 |
# Take the action and observe the reward and next state
|
100 |
next_state, reward, done, _, _ = self.env.step(action)
|
|
|
101 |
if self.env_name == "FrozenLake-v1":
|
102 |
if done:
|
103 |
reward = 100 if reward == 1 else -10
|
|
|
105 |
reward = -1
|
106 |
|
107 |
# Keeping track of the trajectory
|
108 |
+
episode_hist[-1] = (state, action, reward)
|
109 |
+
# Generate the output at intermediate steps for the demo
|
110 |
yield episode_hist, solved, rgb_array
|
111 |
|
|
|
|
|
|
|
112 |
# For CliffWalking-v0 and Taxi-v3, the episode is solved when it terminates
|
113 |
if done and self.env_name in ["CliffWalking-v0", "Taxi-v3"]:
|
114 |
solved = True
|
|
|
115 |
|
116 |
# For FrozenLake-v1, the episode terminates when the agent moves into a hole or reaches the goal
|
117 |
# We consider the episode solved when the agent reaches the goal
|
118 |
if done and self.env_name == "FrozenLake-v1":
|
119 |
if next_state == self.env.nrow * self.env.ncol - 1:
|
120 |
solved = True
|
|
|
121 |
else:
|
122 |
# Instead of terminating the episode when the agent moves into a hole, we reset the environment
|
123 |
# This is to keep consistent with the other environments
|
124 |
+
done, solved = False, False
|
125 |
next_state, _ = self.env.reset()
|
126 |
|
|
|
|
|
|
|
127 |
state = next_state
|
128 |
+
i += 1
|
129 |
|
130 |
rgb_array = self.env.render() if render else None
|
131 |
yield episode_hist, solved, rgb_array
|
MCAgent.py
CHANGED
@@ -140,7 +140,7 @@ class MCAgent(AgentBase):
|
|
140 |
if log_wandb:
|
141 |
wandb.log(stats)
|
142 |
|
143 |
-
if test_running_success_rate > 0.
|
144 |
if save_best:
|
145 |
if self.run_name is None:
|
146 |
print("WARNING: run_name is None, not saving best policy.")
|
|
|
140 |
if log_wandb:
|
141 |
wandb.log(stats)
|
142 |
|
143 |
+
if test_running_success_rate > 0.99:
|
144 |
if save_best:
|
145 |
if self.run_name is None:
|
146 |
print("WARNING: run_name is None, not saving best policy.")
|
demo.py
CHANGED
@@ -15,18 +15,10 @@ default_epsilon = 0.0
|
|
15 |
default_paused = True
|
16 |
|
17 |
frame_env_h, frame_env_w = 512, 768
|
18 |
-
frame_policy_res =
|
19 |
|
20 |
# For the dropdown list of policies
|
21 |
policies_folder = "policies"
|
22 |
-
try:
|
23 |
-
all_policies = [
|
24 |
-
file for file in os.listdir(policies_folder) if file.endswith(".npy")
|
25 |
-
]
|
26 |
-
all_policies.sort()
|
27 |
-
except FileNotFoundError:
|
28 |
-
print("ERROR: No policies folder found!")
|
29 |
-
all_policies = []
|
30 |
|
31 |
|
32 |
action_map = {
|
@@ -42,6 +34,14 @@ action_map = {
|
|
42 |
2: "right",
|
43 |
3: "up",
|
44 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
}
|
46 |
|
47 |
|
@@ -168,7 +168,7 @@ def run(
|
|
168 |
return f"{step + 1}"
|
169 |
|
170 |
for episode in range(n_test_episodes):
|
171 |
-
time.sleep(0.
|
172 |
|
173 |
for step, (episode_hist, solved, frame_env) in enumerate(
|
174 |
agent.generate_episode(
|
@@ -208,7 +208,11 @@ def run(
|
|
208 |
frame_policy_label_color = 1.0 - frame_policy[label_loc_h, label_loc_w]
|
209 |
frame_policy_label_font = cv2.FONT_HERSHEY_SIMPLEX
|
210 |
frame_policy_label_thicc = 1
|
211 |
-
action_text_scale, action_text_label_scale = 0
|
|
|
|
|
|
|
|
|
212 |
|
213 |
(label_width, label_height), _ = cv2.getTextSize(
|
214 |
str(action),
|
@@ -305,15 +309,24 @@ def run(
|
|
305 |
if solved:
|
306 |
episodes_solved += 1
|
307 |
|
308 |
-
time.sleep(0.
|
309 |
|
310 |
localstate.current_policy = None
|
311 |
yield localstate, agent_key, env_key, frame_env, frame_policy, ep_str(
|
312 |
episode + 1
|
313 |
-
), ep_str(episodes_solved), step_str(step), state, action,
|
314 |
|
315 |
|
316 |
with gr.Blocks(title="CS581 Demo") as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
gr.components.HTML(
|
318 |
"<h1>CS581 Final Project Demo - Dynamic Programming & Monte-Carlo RL Methods (<a href='https://huggingface.co/spaces/acozma/CS581-Algos-Demo'>HF Space</a>)</h1>"
|
319 |
)
|
@@ -358,7 +371,7 @@ with gr.Blocks(title="CS581 Demo") as demo:
|
|
358 |
with gr.Row():
|
359 |
out_state = gr.components.Textbox(label="Current State")
|
360 |
out_action = gr.components.Textbox(label="Chosen Action")
|
361 |
-
out_reward = gr.components.Textbox(label="
|
362 |
|
363 |
out_image_policy = gr.components.Image(
|
364 |
label="Action Sampled vs Policy Distribution for Current State",
|
|
|
15 |
default_paused = True
|
16 |
|
17 |
frame_env_h, frame_env_w = 512, 768
|
18 |
+
frame_policy_res = 512
|
19 |
|
20 |
# For the dropdown list of policies
|
21 |
policies_folder = "policies"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
|
24 |
action_map = {
|
|
|
34 |
2: "right",
|
35 |
3: "up",
|
36 |
},
|
37 |
+
"Taxi-v3": {
|
38 |
+
0: "down",
|
39 |
+
1: "up",
|
40 |
+
2: "right",
|
41 |
+
3: "left",
|
42 |
+
4: "pickup",
|
43 |
+
5: "dropoff",
|
44 |
+
},
|
45 |
}
|
46 |
|
47 |
|
|
|
168 |
return f"{step + 1}"
|
169 |
|
170 |
for episode in range(n_test_episodes):
|
171 |
+
time.sleep(0.5)
|
172 |
|
173 |
for step, (episode_hist, solved, frame_env) in enumerate(
|
174 |
agent.generate_episode(
|
|
|
208 |
frame_policy_label_color = 1.0 - frame_policy[label_loc_h, label_loc_w]
|
209 |
frame_policy_label_font = cv2.FONT_HERSHEY_SIMPLEX
|
210 |
frame_policy_label_thicc = 1
|
211 |
+
action_text_scale, action_text_label_scale = 1.0, 0.6
|
212 |
+
# These scales are for policies that have length 4
|
213 |
+
# Longer policies should have smaller scales
|
214 |
+
action_text_scale *= 4 / len(curr_policy)
|
215 |
+
action_text_label_scale *= 4 / len(curr_policy)
|
216 |
|
217 |
(label_width, label_height), _ = cv2.getTextSize(
|
218 |
str(action),
|
|
|
309 |
if solved:
|
310 |
episodes_solved += 1
|
311 |
|
312 |
+
time.sleep(0.5)
|
313 |
|
314 |
localstate.current_policy = None
|
315 |
yield localstate, agent_key, env_key, frame_env, frame_policy, ep_str(
|
316 |
episode + 1
|
317 |
+
), ep_str(episodes_solved), step_str(step), state, action, last_reward, "Done!"
|
318 |
|
319 |
|
320 |
with gr.Blocks(title="CS581 Demo") as demo:
|
321 |
+
try:
|
322 |
+
all_policies = [
|
323 |
+
file for file in os.listdir(policies_folder) if file.endswith(".npy")
|
324 |
+
]
|
325 |
+
all_policies.sort()
|
326 |
+
except FileNotFoundError:
|
327 |
+
print("ERROR: No policies folder found!")
|
328 |
+
all_policies = []
|
329 |
+
|
330 |
gr.components.HTML(
|
331 |
"<h1>CS581 Final Project Demo - Dynamic Programming & Monte-Carlo RL Methods (<a href='https://huggingface.co/spaces/acozma/CS581-Algos-Demo'>HF Space</a>)</h1>"
|
332 |
)
|
|
|
371 |
with gr.Row():
|
372 |
out_state = gr.components.Textbox(label="Current State")
|
373 |
out_action = gr.components.Textbox(label="Chosen Action")
|
374 |
+
out_reward = gr.components.Textbox(label="Reward Received")
|
375 |
|
376 |
out_image_policy = gr.components.Image(
|
377 |
label="Action Sampled vs Policy Distribution for Current State",
|
policies/{DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200.npy β DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e1500_s200.npy}
RENAMED
File without changes
|
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:76291_e2500_s200.npy
DELETED
Binary file (8.32 kB)
|
|
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:78250_e2500_s200.npy
DELETED
Binary file (8.32 kB)
|
|
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:48627_e2500_s200.npy
DELETED
Binary file (32.9 kB)
|
|
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:80137_e2500_s200.npy
DELETED
Binary file (32.9 kB)
|
|
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:10976_e2500_s200.npy
DELETED
Binary file (131 kB)
|
|
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:62659_e2500_s200.npy
DELETED
Binary file (131 kB)
|
|
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:35280_e1500_s200.npy
ADDED
Binary file (2.18 kB). View file
|
|
policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:97556_e2500_s200.npy β DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:61252_e1500_s200.npy}
RENAMED
Binary files a/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:97556_e2500_s200.npy and b/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:61252_e1500_s200.npy differ
|
|
policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:94515_e2500_s200.npy β DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:96883_e1500_s200.npy}
RENAMED
Binary files a/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:94515_e2500_s200.npy and b/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:96883_e1500_s200.npy differ
|
|
policies/DPAgent_Taxi-v3_gamma:0.99_epsilon:0.4_e15000_s200.npy
ADDED
Binary file (24.1 kB). View file
|
|
policies/{MCAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200_first_visit.npy β MCAgent_CliffWalking-v0_gamma:1.0_epsilon:0.4_e1500_s200_first_visit.npy}
RENAMED
Binary files a/policies/MCAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200_first_visit.npy and b/policies/MCAgent_CliffWalking-v0_gamma:1.0_epsilon:0.4_e1500_s200_first_visit.npy differ
|
|
policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:47783_e2500_s200_first_visit.npy β MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:35280_e1500_s200_first_visit.npy}
RENAMED
Binary files a/policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:47783_e2500_s200_first_visit.npy and b/policies/MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:35280_e1500_s200_first_visit.npy differ
|
|
policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:27843_e2500_s200_first_visit.npy β MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:61252_e1500_s200_first_visit.npy}
RENAMED
Binary files a/policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:27843_e2500_s200_first_visit.npy and b/policies/MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:61252_e1500_s200_first_visit.npy differ
|
|
policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:10485_e2500_s200_first_visit.npy β MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:96883_e1500_s200_first_visit.npy}
RENAMED
Binary files a/policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:10485_e2500_s200_first_visit.npy and b/policies/MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:96883_e1500_s200_first_visit.npy differ
|
|
policies/MCAgent_Taxi-v3_gamma:1.0_epsilon:0.75_e15000_s200_first_visit.npy
ADDED
Binary file (24.1 kB). View file
|
|