File size: 3,182 Bytes
ebb75df c37ff18 ca16748 ebb75df cbe0575 c37ff18 cbe0575 ca16748 a30d4ce 1427095 a30d4ce 1427095 ca16748 c37ff18 a30d4ce ca16748 cbe0575 a30d4ce ca16748 c37ff18 ebb75df ca16748 ebb75df cbe0575 ca16748 cbe0575 ca16748 ebb75df cbe0575 ebb75df ca16748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
import gymnasium as gym
import argparse
from datetime import datetime
# This script should have some options
# 1. Turn off the stochasticity as determined by the ALEv5
# Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
# To compensate for this, we can set the repeat action probability to 0
# DONE
# 2. Print out the evaluation metrics or save to file
# DONE
# 4. Print the keyword args for the environment? I think this might be helpful...
# DONE (ish), printing the environment specifications.
# 5. Add option flag to accept file path for model
# DONE
# 6. Add option flag to accept number of episodes
# DONE
# 7. Save evaluations in a log file
# DONE
# 8. Add option flag for mean rewards/length or discrete rewards/lengths
# IN PROGRESS
parser = argparse.ArgumentParser()
parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability, default 0.25", type=float, default=0.25)
parser.add_argument("-f", "--frameskip", help="frameskip, default 4", type=int, default=4)
# parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True)
parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True)
parser.add_argument("-e", "--num_episodes", help="specify the number of episodes to evaluate, default 1", type=int, default=1)
parser.add_argument("-a", "--agent_filepath", help="file path to agent to watch, minus the .zip extension", type=str, required=True)
# parser.add_argument("-s", "--savefile", help="Specify a filepath to save the evaluation metrics.", type=str, default="evals")
args = parser.parse_args()
model_name = args.agent_filepath
model = DQN.load(model_name)
# There should really be a condition here to catch input defining directories with forward slashes
dirs = model_name.split("/")
# remove the last item, as it is the zip file
dirs.pop()
model_dir = "/".join(dirs)
print(type(model_dir))
print(model_dir)
# Retrieve the environment
eval_env = Monitor(gym.make("ALE/Pacman-v5",
render_mode="rgb_array",
repeat_action_probability=args.repeat_action_probability,
frameskip=args.frameskip))
if args.print == True:
env_info = str(eval_env.spec).split(", ")
for item in env_info:
print(item)
# Evaluate the policy
# Toggle the mean or discrete evaluations here
mean_rwd, std_rwd = evaluate_policy(model.policy, eval_env, n_eval_episodes=args.num_episodes)
# savefile = args.savefile
savefile = model_dir + "/evals"
date = datetime.now().strftime("%d %b %Y")
time = datetime.now().strftime("%I:%M:%S %p")
with open(f"{savefile}.txt", "a") as file:
file.write("-----\n")
file.write(f"Evaluation of {model_name} on {date} at {time}\n")
file.write(f"Episodes evaluated: {args.num_episodes}\n")
file.write(f"mean_rwd: {mean_rwd}\n")
file.write(f"std_rwd: {std_rwd}\n\n")
|