Andrei Cozma commited on
Commit
ec8233c
Β·
1 Parent(s): 46b0409
Files changed (19) hide show
  1. AgentBase.py +15 -18
  2. MCAgent.py +1 -1
  3. demo.py +27 -14
  4. policies/{DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200.npy β†’ DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e1500_s200.npy} +0 -0
  5. policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:76291_e2500_s200.npy +0 -0
  6. policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:78250_e2500_s200.npy +0 -0
  7. policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:48627_e2500_s200.npy +0 -0
  8. policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:80137_e2500_s200.npy +0 -0
  9. policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:10976_e2500_s200.npy +0 -0
  10. policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:62659_e2500_s200.npy +0 -0
  11. policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:35280_e1500_s200.npy +0 -0
  12. policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:97556_e2500_s200.npy β†’ DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:61252_e1500_s200.npy} +0 -0
  13. policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:94515_e2500_s200.npy β†’ DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:96883_e1500_s200.npy} +0 -0
  14. policies/DPAgent_Taxi-v3_gamma:0.99_epsilon:0.4_e15000_s200.npy +0 -0
  15. policies/{MCAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200_first_visit.npy β†’ MCAgent_CliffWalking-v0_gamma:1.0_epsilon:0.4_e1500_s200_first_visit.npy} +0 -0
  16. policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:47783_e2500_s200_first_visit.npy β†’ MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:35280_e1500_s200_first_visit.npy} +0 -0
  17. policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:27843_e2500_s200_first_visit.npy β†’ MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:61252_e1500_s200_first_visit.npy} +0 -0
  18. policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:10485_e2500_s200_first_visit.npy β†’ MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:96883_e1500_s200_first_visit.npy} +0 -0
  19. policies/MCAgent_Taxi-v3_gamma:1.0_epsilon:0.75_e15000_s200_first_visit.npy +0 -0
AgentBase.py CHANGED
@@ -82,19 +82,22 @@ class AgentBase:
82
 
83
  def generate_episode(self, max_steps=500, render=False, **kwargs):
84
  state, _ = self.env.reset()
85
- episode_hist, solved, rgb_array = (
86
- [],
87
- False,
88
- self.env.render() if render else None,
89
- )
90
 
 
91
  # Generate an episode following the current policy
92
- for _ in range(max_steps):
93
- # Sample an action from the policy
 
 
94
  action = self.choose_action(state, **kwargs)
 
 
95
  # Take the action and observe the reward and next state
96
  next_state, reward, done, _, _ = self.env.step(action)
97
-
98
  if self.env_name == "FrozenLake-v1":
99
  if done:
100
  reward = 100 if reward == 1 else -10
@@ -102,33 +105,27 @@ class AgentBase:
102
  reward = -1
103
 
104
  # Keeping track of the trajectory
105
- episode_hist.append((state, action, reward))
 
106
  yield episode_hist, solved, rgb_array
107
 
108
- # Rendering new frame if needed
109
- rgb_array = self.env.render() if render else None
110
-
111
  # For CliffWalking-v0 and Taxi-v3, the episode is solved when it terminates
112
  if done and self.env_name in ["CliffWalking-v0", "Taxi-v3"]:
113
  solved = True
114
- break
115
 
116
  # For FrozenLake-v1, the episode terminates when the agent moves into a hole or reaches the goal
117
  # We consider the episode solved when the agent reaches the goal
118
  if done and self.env_name == "FrozenLake-v1":
119
  if next_state == self.env.nrow * self.env.ncol - 1:
120
  solved = True
121
- break
122
  else:
123
  # Instead of terminating the episode when the agent moves into a hole, we reset the environment
124
  # This is to keep consistent with the other environments
125
- done = False
126
  next_state, _ = self.env.reset()
127
 
128
- if solved or done:
129
- break
130
-
131
  state = next_state
 
132
 
133
  rgb_array = self.env.render() if render else None
134
  yield episode_hist, solved, rgb_array
 
82
 
83
  def generate_episode(self, max_steps=500, render=False, **kwargs):
84
  state, _ = self.env.reset()
85
+ # action = self.choose_action(state, **kwargs)
86
+ episode_hist = []
87
+ solved, done = False, False
88
+ rgb_array = self.env.render() if render else None
 
89
 
90
+ i = 0
91
  # Generate an episode following the current policy
92
+ while i < max_steps and not solved and not done:
93
+ # Render the environment if needed
94
+ rgb_array = self.env.render() if render else None
95
+ # Sample the next action from the policy
96
  action = self.choose_action(state, **kwargs)
97
+ # Keeping track of the trajectory
98
+ episode_hist.append((state, action, None))
99
  # Take the action and observe the reward and next state
100
  next_state, reward, done, _, _ = self.env.step(action)
 
101
  if self.env_name == "FrozenLake-v1":
102
  if done:
103
  reward = 100 if reward == 1 else -10
 
105
  reward = -1
106
 
107
  # Keeping track of the trajectory
108
+ episode_hist[-1] = (state, action, reward)
109
+ # Generate the output at intermediate steps for the demo
110
  yield episode_hist, solved, rgb_array
111
 
 
 
 
112
  # For CliffWalking-v0 and Taxi-v3, the episode is solved when it terminates
113
  if done and self.env_name in ["CliffWalking-v0", "Taxi-v3"]:
114
  solved = True
 
115
 
116
  # For FrozenLake-v1, the episode terminates when the agent moves into a hole or reaches the goal
117
  # We consider the episode solved when the agent reaches the goal
118
  if done and self.env_name == "FrozenLake-v1":
119
  if next_state == self.env.nrow * self.env.ncol - 1:
120
  solved = True
 
121
  else:
122
  # Instead of terminating the episode when the agent moves into a hole, we reset the environment
123
  # This is to keep consistent with the other environments
124
+ done, solved = False, False
125
  next_state, _ = self.env.reset()
126
 
 
 
 
127
  state = next_state
128
+ i += 1
129
 
130
  rgb_array = self.env.render() if render else None
131
  yield episode_hist, solved, rgb_array
MCAgent.py CHANGED
@@ -140,7 +140,7 @@ class MCAgent(AgentBase):
140
  if log_wandb:
141
  wandb.log(stats)
142
 
143
- if test_running_success_rate > 0.99999:
144
  if save_best:
145
  if self.run_name is None:
146
  print("WARNING: run_name is None, not saving best policy.")
 
140
  if log_wandb:
141
  wandb.log(stats)
142
 
143
+ if test_running_success_rate > 0.99:
144
  if save_best:
145
  if self.run_name is None:
146
  print("WARNING: run_name is None, not saving best policy.")
demo.py CHANGED
@@ -15,18 +15,10 @@ default_epsilon = 0.0
15
  default_paused = True
16
 
17
  frame_env_h, frame_env_w = 512, 768
18
- frame_policy_res = 256
19
 
20
  # For the dropdown list of policies
21
  policies_folder = "policies"
22
- try:
23
- all_policies = [
24
- file for file in os.listdir(policies_folder) if file.endswith(".npy")
25
- ]
26
- all_policies.sort()
27
- except FileNotFoundError:
28
- print("ERROR: No policies folder found!")
29
- all_policies = []
30
 
31
 
32
  action_map = {
@@ -42,6 +34,14 @@ action_map = {
42
  2: "right",
43
  3: "up",
44
  },
 
 
 
 
 
 
 
 
45
  }
46
 
47
 
@@ -168,7 +168,7 @@ def run(
168
  return f"{step + 1}"
169
 
170
  for episode in range(n_test_episodes):
171
- time.sleep(0.25)
172
 
173
  for step, (episode_hist, solved, frame_env) in enumerate(
174
  agent.generate_episode(
@@ -208,7 +208,11 @@ def run(
208
  frame_policy_label_color = 1.0 - frame_policy[label_loc_h, label_loc_w]
209
  frame_policy_label_font = cv2.FONT_HERSHEY_SIMPLEX
210
  frame_policy_label_thicc = 1
211
- action_text_scale, action_text_label_scale = 0.6, 0.3
 
 
 
 
212
 
213
  (label_width, label_height), _ = cv2.getTextSize(
214
  str(action),
@@ -305,15 +309,24 @@ def run(
305
  if solved:
306
  episodes_solved += 1
307
 
308
- time.sleep(0.25)
309
 
310
  localstate.current_policy = None
311
  yield localstate, agent_key, env_key, frame_env, frame_policy, ep_str(
312
  episode + 1
313
- ), ep_str(episodes_solved), step_str(step), state, action, reward, "Done!"
314
 
315
 
316
  with gr.Blocks(title="CS581 Demo") as demo:
 
 
 
 
 
 
 
 
 
317
  gr.components.HTML(
318
  "<h1>CS581 Final Project Demo - Dynamic Programming & Monte-Carlo RL Methods (<a href='https://huggingface.co/spaces/acozma/CS581-Algos-Demo'>HF Space</a>)</h1>"
319
  )
@@ -358,7 +371,7 @@ with gr.Blocks(title="CS581 Demo") as demo:
358
  with gr.Row():
359
  out_state = gr.components.Textbox(label="Current State")
360
  out_action = gr.components.Textbox(label="Chosen Action")
361
- out_reward = gr.components.Textbox(label="Last Reward")
362
 
363
  out_image_policy = gr.components.Image(
364
  label="Action Sampled vs Policy Distribution for Current State",
 
15
  default_paused = True
16
 
17
  frame_env_h, frame_env_w = 512, 768
18
+ frame_policy_res = 512
19
 
20
  # For the dropdown list of policies
21
  policies_folder = "policies"
 
 
 
 
 
 
 
 
22
 
23
 
24
  action_map = {
 
34
  2: "right",
35
  3: "up",
36
  },
37
+ "Taxi-v3": {
38
+ 0: "down",
39
+ 1: "up",
40
+ 2: "right",
41
+ 3: "left",
42
+ 4: "pickup",
43
+ 5: "dropoff",
44
+ },
45
  }
46
 
47
 
 
168
  return f"{step + 1}"
169
 
170
  for episode in range(n_test_episodes):
171
+ time.sleep(0.5)
172
 
173
  for step, (episode_hist, solved, frame_env) in enumerate(
174
  agent.generate_episode(
 
208
  frame_policy_label_color = 1.0 - frame_policy[label_loc_h, label_loc_w]
209
  frame_policy_label_font = cv2.FONT_HERSHEY_SIMPLEX
210
  frame_policy_label_thicc = 1
211
+ action_text_scale, action_text_label_scale = 1.0, 0.6
212
+ # These scales are for policies that have length 4
213
+ # Longer policies should have smaller scales
214
+ action_text_scale *= 4 / len(curr_policy)
215
+ action_text_label_scale *= 4 / len(curr_policy)
216
 
217
  (label_width, label_height), _ = cv2.getTextSize(
218
  str(action),
 
309
  if solved:
310
  episodes_solved += 1
311
 
312
+ time.sleep(0.5)
313
 
314
  localstate.current_policy = None
315
  yield localstate, agent_key, env_key, frame_env, frame_policy, ep_str(
316
  episode + 1
317
+ ), ep_str(episodes_solved), step_str(step), state, action, last_reward, "Done!"
318
 
319
 
320
  with gr.Blocks(title="CS581 Demo") as demo:
321
+ try:
322
+ all_policies = [
323
+ file for file in os.listdir(policies_folder) if file.endswith(".npy")
324
+ ]
325
+ all_policies.sort()
326
+ except FileNotFoundError:
327
+ print("ERROR: No policies folder found!")
328
+ all_policies = []
329
+
330
  gr.components.HTML(
331
  "<h1>CS581 Final Project Demo - Dynamic Programming & Monte-Carlo RL Methods (<a href='https://huggingface.co/spaces/acozma/CS581-Algos-Demo'>HF Space</a>)</h1>"
332
  )
 
371
  with gr.Row():
372
  out_state = gr.components.Textbox(label="Current State")
373
  out_action = gr.components.Textbox(label="Chosen Action")
374
+ out_reward = gr.components.Textbox(label="Reward Received")
375
 
376
  out_image_policy = gr.components.Image(
377
  label="Action Sampled vs Policy Distribution for Current State",
policies/{DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200.npy β†’ DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e1500_s200.npy} RENAMED
File without changes
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:76291_e2500_s200.npy DELETED
Binary file (8.32 kB)
 
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:78250_e2500_s200.npy DELETED
Binary file (8.32 kB)
 
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:48627_e2500_s200.npy DELETED
Binary file (32.9 kB)
 
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:80137_e2500_s200.npy DELETED
Binary file (32.9 kB)
 
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:10976_e2500_s200.npy DELETED
Binary file (131 kB)
 
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:62659_e2500_s200.npy DELETED
Binary file (131 kB)
 
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:35280_e1500_s200.npy ADDED
Binary file (2.18 kB). View file
 
policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:97556_e2500_s200.npy β†’ DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:61252_e1500_s200.npy} RENAMED
Binary files a/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:97556_e2500_s200.npy and b/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:61252_e1500_s200.npy differ
 
policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:94515_e2500_s200.npy β†’ DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:96883_e1500_s200.npy} RENAMED
Binary files a/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:94515_e2500_s200.npy and b/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:96883_e1500_s200.npy differ
 
policies/DPAgent_Taxi-v3_gamma:0.99_epsilon:0.4_e15000_s200.npy ADDED
Binary file (24.1 kB). View file
 
policies/{MCAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200_first_visit.npy β†’ MCAgent_CliffWalking-v0_gamma:1.0_epsilon:0.4_e1500_s200_first_visit.npy} RENAMED
Binary files a/policies/MCAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200_first_visit.npy and b/policies/MCAgent_CliffWalking-v0_gamma:1.0_epsilon:0.4_e1500_s200_first_visit.npy differ
 
policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:47783_e2500_s200_first_visit.npy β†’ MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:35280_e1500_s200_first_visit.npy} RENAMED
Binary files a/policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:47783_e2500_s200_first_visit.npy and b/policies/MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:35280_e1500_s200_first_visit.npy differ
 
policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:27843_e2500_s200_first_visit.npy β†’ MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:61252_e1500_s200_first_visit.npy} RENAMED
Binary files a/policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:27843_e2500_s200_first_visit.npy and b/policies/MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:61252_e1500_s200_first_visit.npy differ
 
policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:10485_e2500_s200_first_visit.npy β†’ MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:96883_e1500_s200_first_visit.npy} RENAMED
Binary files a/policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:10485_e2500_s200_first_visit.npy and b/policies/MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:96883_e1500_s200_first_visit.npy differ
 
policies/MCAgent_Taxi-v3_gamma:1.0_epsilon:0.75_e15000_s200_first_visit.npy ADDED
Binary file (24.1 kB). View file