Spaces:

acozma
/

CS581-Algos-Demo

Sleeping

App Files Files Community

Andrei Cozma commited on Apr 23, 2023

Commit

ec8233c

1 Parent(s): 46b0409

Updates

Browse files

Files changed (19) hide show

AgentBase.py +15 -18
MCAgent.py +1 -1
demo.py +27 -14
policies/{DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200.npy → DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e1500_s200.npy} +0 -0
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:76291_e2500_s200.npy +0 -0
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:78250_e2500_s200.npy +0 -0
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:48627_e2500_s200.npy +0 -0
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:80137_e2500_s200.npy +0 -0
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:10976_e2500_s200.npy +0 -0
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:62659_e2500_s200.npy +0 -0
policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:35280_e1500_s200.npy +0 -0
policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:97556_e2500_s200.npy → DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:61252_e1500_s200.npy} +0 -0
policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:94515_e2500_s200.npy → DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:96883_e1500_s200.npy} +0 -0
policies/DPAgent_Taxi-v3_gamma:0.99_epsilon:0.4_e15000_s200.npy +0 -0
policies/{MCAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200_first_visit.npy → MCAgent_CliffWalking-v0_gamma:1.0_epsilon:0.4_e1500_s200_first_visit.npy} +0 -0
policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:47783_e2500_s200_first_visit.npy → MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:35280_e1500_s200_first_visit.npy} +0 -0
policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:27843_e2500_s200_first_visit.npy → MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:61252_e1500_s200_first_visit.npy} +0 -0
policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:10485_e2500_s200_first_visit.npy → MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:96883_e1500_s200_first_visit.npy} +0 -0
policies/MCAgent_Taxi-v3_gamma:1.0_epsilon:0.75_e15000_s200_first_visit.npy +0 -0

AgentBase.py CHANGED Viewed

@@ -82,19 +82,22 @@ class AgentBase:
     def generate_episode(self, max_steps=500, render=False, **kwargs):
         state, _ = self.env.reset()
-        episode_hist, solved, rgb_array = (
-            [],
-            False,
-            self.env.render() if render else None,
-        )
         # Generate an episode following the current policy
-        for _ in range(max_steps):
-            # Sample an action from the policy
             action = self.choose_action(state, **kwargs)
             # Take the action and observe the reward and next state
             next_state, reward, done, _, _ = self.env.step(action)
             if self.env_name == "FrozenLake-v1":
                 if done:
                     reward = 100 if reward == 1 else -10
@@ -102,33 +105,27 @@ class AgentBase:
                     reward = -1
             # Keeping track of the trajectory
-            episode_hist.append((state, action, reward))
             yield episode_hist, solved, rgb_array
-            # Rendering new frame if needed
-            rgb_array = self.env.render() if render else None
             # For CliffWalking-v0 and Taxi-v3, the episode is solved when it terminates
             if done and self.env_name in ["CliffWalking-v0", "Taxi-v3"]:
                 solved = True
-                break
             # For FrozenLake-v1, the episode terminates when the agent moves into a hole or reaches the goal
             # We consider the episode solved when the agent reaches the goal
             if done and self.env_name == "FrozenLake-v1":
                 if next_state == self.env.nrow * self.env.ncol - 1:
                     solved = True
-                    break
                 else:
                     # Instead of terminating the episode when the agent moves into a hole, we reset the environment
                     # This is to keep consistent with the other environments
-                    done = False
                     next_state, _ = self.env.reset()
-            if solved or done:
-                break
             state = next_state
         rgb_array = self.env.render() if render else None
         yield episode_hist, solved, rgb_array

     def generate_episode(self, max_steps=500, render=False, **kwargs):
         state, _ = self.env.reset()
+        # action = self.choose_action(state, **kwargs)
+        episode_hist = []
+        solved, done = False, False
+        rgb_array = self.env.render() if render else None
+        i = 0
         # Generate an episode following the current policy
+        while i < max_steps and not solved and not done:
+            # Render the environment if needed
+            rgb_array = self.env.render() if render else None
+            # Sample the next action from the policy
             action = self.choose_action(state, **kwargs)
+            # Keeping track of the trajectory
+            episode_hist.append((state, action, None))
             # Take the action and observe the reward and next state
             next_state, reward, done, _, _ = self.env.step(action)
             if self.env_name == "FrozenLake-v1":
                 if done:
                     reward = 100 if reward == 1 else -10
                     reward = -1
             # Keeping track of the trajectory
+            episode_hist[-1] = (state, action, reward)
+            # Generate the output at intermediate steps for the demo
             yield episode_hist, solved, rgb_array
             # For CliffWalking-v0 and Taxi-v3, the episode is solved when it terminates
             if done and self.env_name in ["CliffWalking-v0", "Taxi-v3"]:
                 solved = True
             # For FrozenLake-v1, the episode terminates when the agent moves into a hole or reaches the goal
             # We consider the episode solved when the agent reaches the goal
             if done and self.env_name == "FrozenLake-v1":
                 if next_state == self.env.nrow * self.env.ncol - 1:
                     solved = True
                 else:
                     # Instead of terminating the episode when the agent moves into a hole, we reset the environment
                     # This is to keep consistent with the other environments
+                    done, solved = False, False
                     next_state, _ = self.env.reset()
             state = next_state
+            i += 1
         rgb_array = self.env.render() if render else None
         yield episode_hist, solved, rgb_array

MCAgent.py CHANGED Viewed

@@ -140,7 +140,7 @@ class MCAgent(AgentBase):
             if log_wandb:
                 wandb.log(stats)
-            if test_running_success_rate > 0.99999:
                 if save_best:
                     if self.run_name is None:
                         print("WARNING: run_name is None, not saving best policy.")

             if log_wandb:
                 wandb.log(stats)
+            if test_running_success_rate > 0.99:
                 if save_best:
                     if self.run_name is None:
                         print("WARNING: run_name is None, not saving best policy.")

demo.py CHANGED Viewed

@@ -15,18 +15,10 @@ default_epsilon = 0.0
 default_paused = True
 frame_env_h, frame_env_w = 512, 768
-frame_policy_res = 256
 # For the dropdown list of policies
 policies_folder = "policies"
-try:
-    all_policies = [
-        file for file in os.listdir(policies_folder) if file.endswith(".npy")
-    ]
-    all_policies.sort()
-except FileNotFoundError:
-    print("ERROR: No policies folder found!")
-    all_policies = []
 action_map = {
@@ -42,6 +34,14 @@ action_map = {
         2: "right",
         3: "up",
     },
 }
@@ -168,7 +168,7 @@ def run(
         return f"{step + 1}"
     for episode in range(n_test_episodes):
-        time.sleep(0.25)
         for step, (episode_hist, solved, frame_env) in enumerate(
             agent.generate_episode(
@@ -208,7 +208,11 @@ def run(
             frame_policy_label_color = 1.0 - frame_policy[label_loc_h, label_loc_w]
             frame_policy_label_font = cv2.FONT_HERSHEY_SIMPLEX
             frame_policy_label_thicc = 1
-            action_text_scale, action_text_label_scale = 0.6, 0.3
             (label_width, label_height), _ = cv2.getTextSize(
                 str(action),
@@ -305,15 +309,24 @@ def run(
         if solved:
             episodes_solved += 1
-        time.sleep(0.25)
     localstate.current_policy = None
     yield localstate, agent_key, env_key, frame_env, frame_policy, ep_str(
         episode + 1
-    ), ep_str(episodes_solved), step_str(step), state, action, reward, "Done!"
 with gr.Blocks(title="CS581 Demo") as demo:
     gr.components.HTML(
         "<h1>CS581 Final Project Demo - Dynamic Programming & Monte-Carlo RL Methods (<a href='https://huggingface.co/spaces/acozma/CS581-Algos-Demo'>HF Space</a>)</h1>"
     )
@@ -358,7 +371,7 @@ with gr.Blocks(title="CS581 Demo") as demo:
             with gr.Row():
                 out_state = gr.components.Textbox(label="Current State")
                 out_action = gr.components.Textbox(label="Chosen Action")
-                out_reward = gr.components.Textbox(label="Last Reward")
         out_image_policy = gr.components.Image(
             label="Action Sampled vs Policy Distribution for Current State",

 default_paused = True
 frame_env_h, frame_env_w = 512, 768
+frame_policy_res = 512
 # For the dropdown list of policies
 policies_folder = "policies"
 action_map = {
         2: "right",
         3: "up",
     },
+    "Taxi-v3": {
+        0: "down",
+        1: "up",
+        2: "right",
+        3: "left",
+        4: "pickup",
+        5: "dropoff",
+    },
 }
         return f"{step + 1}"
     for episode in range(n_test_episodes):
+        time.sleep(0.5)
         for step, (episode_hist, solved, frame_env) in enumerate(
             agent.generate_episode(
             frame_policy_label_color = 1.0 - frame_policy[label_loc_h, label_loc_w]
             frame_policy_label_font = cv2.FONT_HERSHEY_SIMPLEX
             frame_policy_label_thicc = 1
+            action_text_scale, action_text_label_scale = 1.0, 0.6
+            # These scales are for policies that have length 4
+            # Longer policies should have smaller scales
+            action_text_scale *= 4 / len(curr_policy)
+            action_text_label_scale *= 4 / len(curr_policy)
             (label_width, label_height), _ = cv2.getTextSize(
                 str(action),
         if solved:
             episodes_solved += 1
+        time.sleep(0.5)
     localstate.current_policy = None
     yield localstate, agent_key, env_key, frame_env, frame_policy, ep_str(
         episode + 1
+    ), ep_str(episodes_solved), step_str(step), state, action, last_reward, "Done!"
 with gr.Blocks(title="CS581 Demo") as demo:
+    try:
+        all_policies = [
+            file for file in os.listdir(policies_folder) if file.endswith(".npy")
+        ]
+        all_policies.sort()
+    except FileNotFoundError:
+        print("ERROR: No policies folder found!")
+        all_policies = []
     gr.components.HTML(
         "<h1>CS581 Final Project Demo - Dynamic Programming & Monte-Carlo RL Methods (<a href='https://huggingface.co/spaces/acozma/CS581-Algos-Demo'>HF Space</a>)</h1>"
     )
             with gr.Row():
                 out_state = gr.components.Textbox(label="Current State")
                 out_action = gr.components.Textbox(label="Chosen Action")
+                out_reward = gr.components.Textbox(label="Reward Received")
         out_image_policy = gr.components.Image(
             label="Action Sampled vs Policy Distribution for Current State",

policies/{DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200.npy → DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e1500_s200.npy} RENAMED Viewed

File without changes

policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:76291_e2500_s200.npy DELETED Viewed

Binary file (8.32 kB)

policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:16_seed:78250_e2500_s200.npy DELETED Viewed

Binary file (8.32 kB)

policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:48627_e2500_s200.npy DELETED Viewed

Binary file (32.9 kB)

policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:32_seed:80137_e2500_s200.npy DELETED Viewed

Binary file (32.9 kB)

policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:10976_e2500_s200.npy DELETED Viewed

Binary file (131 kB)

policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:64_seed:62659_e2500_s200.npy DELETED Viewed

Binary file (131 kB)

policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:35280_e1500_s200.npy ADDED Viewed

Binary file (2.18 kB). View file

policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:97556_e2500_s200.npy → DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:61252_e1500_s200.npy} RENAMED Viewed

Binary files a/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:97556_e2500_s200.npy and b/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:61252_e1500_s200.npy differ

policies/{DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:94515_e2500_s200.npy → DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:96883_e1500_s200.npy} RENAMED Viewed

Binary files a/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:94515_e2500_s200.npy and b/policies/DPAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:96883_e1500_s200.npy differ

policies/DPAgent_Taxi-v3_gamma:0.99_epsilon:0.4_e15000_s200.npy ADDED Viewed

Binary file (24.1 kB). View file

policies/{MCAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200_first_visit.npy → MCAgent_CliffWalking-v0_gamma:1.0_epsilon:0.4_e1500_s200_first_visit.npy} RENAMED Viewed

Binary files a/policies/MCAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200_first_visit.npy and b/policies/MCAgent_CliffWalking-v0_gamma:1.0_epsilon:0.4_e1500_s200_first_visit.npy differ

policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:47783_e2500_s200_first_visit.npy → MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:35280_e1500_s200_first_visit.npy} RENAMED Viewed

Binary files a/policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:47783_e2500_s200_first_visit.npy and b/policies/MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:35280_e1500_s200_first_visit.npy differ

policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:27843_e2500_s200_first_visit.npy → MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:61252_e1500_s200_first_visit.npy} RENAMED Viewed

Binary files a/policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:27843_e2500_s200_first_visit.npy and b/policies/MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:61252_e1500_s200_first_visit.npy differ

policies/{MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:10485_e2500_s200_first_visit.npy → MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:96883_e1500_s200_first_visit.npy} RENAMED Viewed

Binary files a/policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:10485_e2500_s200_first_visit.npy and b/policies/MCAgent_FrozenLake-v1_gamma:1.0_epsilon:0.4_size:8_seed:96883_e1500_s200_first_visit.npy differ

policies/MCAgent_Taxi-v3_gamma:1.0_epsilon:0.75_e15000_s200_first_visit.npy ADDED Viewed

Binary file (24.1 kB). View file