Spaces:

acozma
/

CS581-Algos-Demo

Sleeping

App Files Files Community

Andrei Cozma commited on Apr 23, 2023

Commit

e173b06

1 Parent(s): 3266489

Updates

Browse files

Files changed (6) hide show

agents.py +34 -4
demo.py +6 -24
policies/DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200.npy +0 -0
policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.2_size:8_seed:33951_e2500_s200_first_visit.npy +0 -0
policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:16970_e2500_s200_first_visit.npy +0 -0
run.py +3 -2

agents.py CHANGED Viewed

@@ -1,14 +1,44 @@
 # All supported agents
 from MCAgent import MCAgent
 from DPAgent import DPAgent
 AGENTS_MAP = {"MCAgent": MCAgent, "DPAgent": DPAgent}
-def load_agent(agent_name, **kwargs):
-    if agent_name not in AGENTS_MAP:
         raise ValueError(
-            f"ERROR: Agent '{agent_name}' not valid. Must be one of: {AGENTS_MAP.keys()}"
         )
-    return AGENTS_MAP[agent_name](**kwargs)

 # All supported agents
+import os
 from MCAgent import MCAgent
 from DPAgent import DPAgent
+import warnings
 AGENTS_MAP = {"MCAgent": MCAgent, "DPAgent": DPAgent}
+def load_agent(agent_key, **kwargs):
+    agent_policy_file = agent_key if agent_key.endswith(".npy") else None
+    if agent_policy_file is not None:
+        props = os.path.basename(agent_key).split("_")
+        try:
+            agent_key, env_key = props[0], props[1]
+            agent_args = {}
+            for prop in props[2:]:
+                props_split = prop.split(":")
+                if len(props_split) == 2:
+                    agent_args[props_split[0]] = props_split[1]
+                else:
+                    warnings.warn(
+                        f"Skipping property {prop} as it does not have the format 'key:value'.",
+                        UserWarning,
+                    )
+            agent_args["env"] = env_key
+            kwargs.update(agent_args)
+            print("agent_args:", kwargs)
+        except IndexError:
+            raise ValueError(
+                f"ERROR: Could not parse agent properties. Must be of the format 'AgentName_EnvName_key:value_key:value...'."
+            )
+    if agent_key not in AGENTS_MAP:
         raise ValueError(
+            f"ERROR: Agent '{agent_key}' not valid. Must be one of: {AGENTS_MAP.keys()}"
         )
+    agent = AGENTS_MAP[agent_key](**kwargs)
+    if agent_policy_file is not None:
+        agent.load_policy(agent_policy_file)
+    return agent

demo.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import os
 import time
-import warnings
 import numpy as np
 import gradio as gr
 import scipy.ndimage
 import cv2
-from agents import AGENTS_MAP
 default_n_test_episodes = 10
 default_max_steps = 500
@@ -137,33 +136,16 @@ def run(
     print(f"- epsilon: {localstate.live_steps_forward}")
     policy_path = os.path.join(policies_folder, policy_fname)
-    props = policy_fname.split("_")
     try:
-        agent_key, env_key = props[0], props[1]
-        agent_args = {}
-        for prop in props[2:]:
-            props_split = prop.split(":")
-            if len(props_split) == 2:
-                agent_args[props_split[0]] = props_split[1]
-            else:
-                warnings.warn(
-                    f"Skipping property {prop} as it does not have the format 'key:value'.",
-                    UserWarning,
-                )
-    except IndexError:
         yield localstate, None, None, None, None, None, None, None, None, None, None, "🚫 Please select a valid policy file."
         return
-    agent_args.update(
-        {
-            "env": env_key,
-            "render_mode": "rgb_array",
-        }
-    )
-    print("agent_args:", agent_args)
-    agent = AGENTS_MAP[agent_key](**agent_args)
-    agent.load_policy(policy_path)
     env_action_map = action_map.get(env_key)
     solved, frame_env, frame_policy = None, None, None

 import os
 import time
 import numpy as np
 import gradio as gr
 import scipy.ndimage
 import cv2
+from agents import load_agent
 default_n_test_episodes = 10
 default_max_steps = 500
     print(f"- epsilon: {localstate.live_steps_forward}")
     policy_path = os.path.join(policies_folder, policy_fname)
     try:
+        agent = load_agent(
+            policy_path, return_agent_env_keys=True, render_mode="rgb_array"
+        )
+    except ValueError:
         yield localstate, None, None, None, None, None, None, None, None, None, None, "🚫 Please select a valid policy file."
         return
+    agent_key, env_key = agent.__class__.__name__, agent.env_name
     env_action_map = action_map.get(env_key)
     solved, frame_env, frame_policy = None, None, None

policies/DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200.npy CHANGED Viewed

Binary files a/policies/DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200.npy and b/policies/DPAgent_CliffWalking-v0_gamma:0.99_epsilon:0.4_e2500_s200.npy differ

policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.2_size:8_seed:33951_e2500_s200_first_visit.npy DELETED Viewed

Binary file (2.18 kB)

policies/MCAgent_FrozenLake-v1_gamma:0.99_epsilon:0.4_size:8_seed:16970_e2500_s200_first_visit.npy DELETED Viewed

Binary file (2.18 kB)

run.py CHANGED Viewed

@@ -138,7 +138,9 @@ def main():
     args = parser.parse_args()
     print(vars(args))
-    agent = load_agent(args.agent, **dict(args._get_kwargs()))
     agent.run_name += f"_e{args.n_train_episodes}_s{args.max_steps}"
     if args.wandb_run_name_suffix is not None:
@@ -169,7 +171,6 @@ def main():
             if not args.no_save:
                 agent.save_policy(save_dir=args.save_dir)
         elif args.test is not None:
-            agent.load_policy(args.test)
             agent.test(
                 n_test_episodes=args.n_test_episodes,
                 max_steps=args.max_steps,

     args = parser.parse_args()
     print(vars(args))
+    agent = load_agent(
+        args.agent if args.test is None else args.test, **dict(args._get_kwargs())
+    )
     agent.run_name += f"_e{args.n_train_episodes}_s{args.max_steps}"
     if args.wandb_run_name_suffix is not None:
             if not args.no_save:
                 agent.save_policy(save_dir=args.save_dir)
         elif args.test is not None:
             agent.test(
                 n_test_episodes=args.n_test_episodes,
                 max_steps=args.max_steps,