ledmands commited on
Commit
ca16748
·
1 Parent(s): baeb5f8

Moved the config and watch scripts to the root directory. Split the watch script into two scripts: watch and evaluate.

Browse files
agents/dqn_v2-6/evals.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ -----
2
+ Evaluation of agents/dqn_v2-6/ALE-Pacman-v5 on 16 May 2024 at 11:24:26 AM
3
+ Episodes evaluated: 1
4
+ mean_rwd: 399.0
5
+ std_rwd: 0.0
6
+
evaluate_agent.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from stable_baselines3 import DQN
2
+ from stable_baselines3.common.evaluation import evaluate_policy
3
+ from stable_baselines3.common.monitor import Monitor
4
+ import gymnasium as gym
5
+
6
+ import argparse
7
+ from datetime import datetime
8
+
9
+
10
+ # This script should have some options
11
+ # 1. Turn off the stochasticity as determined by the ALEv5
12
+ # Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
13
+ # To compensate for this, we can set the repeat action probability to 0
14
+ # DONE
15
+ # 2. Print out the evaluation metrics or save to file
16
+ # DONE
17
+ # 4. Print the keyword args for the environment? I think this might be helpful...
18
+ # DONE (ish), printing the environment specifications.
19
+ # 5. Add option flag to accept file path for model
20
+ # DONE
21
+ # 6. Add option flag to accept number of episodes
22
+ # DONE
23
+ # 7. Save evaluations in a log file
24
+ # DONE
25
+ # 8. Add option flag for mean rewards/length or discrete rewards/lengths
26
+ # IN PROGRESS
27
+
28
+ parser = argparse.ArgumentParser()
29
+ parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability, default 0.25", type=float, default=0.25)
30
+ parser.add_argument("-f", "--frameskip", help="frameskip, default 4", type=int, default=4)
31
+ # parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True)
32
+ parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True)
33
+ parser.add_argument("-e", "--num_episodes", help="specify the number of episodes to evaluate, default 1", type=int, default=1)
34
+ parser.add_argument("-a", "--agent_filepath", help="file path to agent to watch, minus the .zip extension", type=str, required=True)
35
+ # parser.add_argument("-s", "--savefile", help="Specify a filepath to save the evaluation metrics.", type=str, default="evals")
36
+ args = parser.parse_args()
37
+
38
+ model_name = args.agent_filepath
39
+ model = DQN.load(model_name)
40
+ # There should really be a condition here to catch input defining directories with forward slashes
41
+ dirs = model_name.split("/")
42
+ # remove the last item, as it is the zip file
43
+ dirs.pop()
44
+ model_dir = "/".join(dirs)
45
+ print(type(model_dir))
46
+ print(model_dir)
47
+
48
+ # Retrieve the environment
49
+ eval_env = Monitor(gym.make("ALE/Pacman-v5",
50
+ render_mode="rgb_array",
51
+ repeat_action_probability=args.repeat_action_probability,
52
+ frameskip=args.frameskip))
53
+
54
+ if args.print == True:
55
+ env_info = str(eval_env.spec).split(", ")
56
+ for item in env_info:
57
+ print(item)
58
+ # Evaluate the policy
59
+ # Toggle the mean or discrete evaluations here
60
+ mean_rwd, std_rwd = evaluate_policy(model.policy, eval_env, n_eval_episodes=args.num_episodes)
61
+
62
+ # savefile = args.savefile
63
+ savefile = model_dir + "/evals"
64
+ date = datetime.now().strftime("%d %b %Y")
65
+ time = datetime.now().strftime("%I:%M:%S %p")
66
+
67
+ with open(f"{savefile}.txt", "a") as file:
68
+ file.write("-----\n")
69
+ file.write(f"Evaluation of {model_name} on {date} at {time}\n")
70
+ file.write(f"Episodes evaluated: {args.num_episodes}\n")
71
+ file.write(f"mean_rwd: {mean_rwd}\n")
72
+ file.write(f"std_rwd: {std_rwd}\n\n")
73
+
agents/watch_agent.py → watch_agent.py RENAMED
@@ -24,7 +24,7 @@ import argparse
24
  parser = argparse.ArgumentParser()
25
  parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability, default 0.25", type=float, default=0.25)
26
  parser.add_argument("-f", "--frameskip", help="frameskip, default 4", type=int, default=4)
27
- parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True)
28
  parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True)
29
  parser.add_argument("-e", "--num_episodes", help="specify the number of episodes to evaluate, default 1", type=int, default=1)
30
  parser.add_argument("-a", "--agent_filepath", help="file path to agent to watch, minus the .zip extension", type=str, required=True)
@@ -34,14 +34,14 @@ MODEL_NAME = args.agent_filepath
34
  loaded_model = DQN.load(MODEL_NAME)
35
 
36
  # Toggle the render mode based on the -o flag
37
- if args.observe == True:
38
- mode = "human"
39
- else:
40
- mode = "rgb_array"
41
 
42
  # Retrieve the environment
43
  eval_env = Monitor(gym.make("ALE/Pacman-v5",
44
- render_mode=mode,
45
  repeat_action_probability=args.repeat_action_probability,
46
  frameskip=args.frameskip,))
47
 
@@ -50,7 +50,9 @@ if args.print == True:
50
  for item in env_info:
51
  print(item)
52
  # Evaluate the policy
53
- mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=args.num_episodes)
54
- print("eval episodes: ", args.num_episodes)
55
- print("mean rwd: ", mean_rwd)
56
- print("std rwd: ", std_rwd)
 
 
 
24
  parser = argparse.ArgumentParser()
25
  parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability, default 0.25", type=float, default=0.25)
26
  parser.add_argument("-f", "--frameskip", help="frameskip, default 4", type=int, default=4)
27
+ # parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True)
28
  parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True)
29
  parser.add_argument("-e", "--num_episodes", help="specify the number of episodes to evaluate, default 1", type=int, default=1)
30
  parser.add_argument("-a", "--agent_filepath", help="file path to agent to watch, minus the .zip extension", type=str, required=True)
 
34
  loaded_model = DQN.load(MODEL_NAME)
35
 
36
  # Toggle the render mode based on the -o flag
37
+ # if args.observe == True:
38
+ # mode = "human"
39
+ # else:
40
+ # mode = "rgb_array"
41
 
42
  # Retrieve the environment
43
  eval_env = Monitor(gym.make("ALE/Pacman-v5",
44
+ render_mode="human",
45
  repeat_action_probability=args.repeat_action_probability,
46
  frameskip=args.frameskip,))
47
 
 
50
  for item in env_info:
51
  print(item)
52
  # Evaluate the policy
53
+ # mean_rwd, std_rwd =
54
+
55
+ evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=args.num_episodes)
56
+ # print("eval episodes: ", args.num_episodes)
57
+ # print("mean rwd: ", mean_rwd)
58
+ # print("std rwd: ", std_rwd)