from stable_baselines3 import DQN from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.monitor import Monitor import gymnasium as gym import argparse MODEL_NAME = "ALE-Pacman-v5" loaded_model = DQN.load(MODEL_NAME) # This script should have some options # 1. Turn off the stochasticity as determined by the ALEv5 # Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time # To compensate for this, we can set the repeat action probability to 0 # DONE # 2. Print out the evaluation metrics or save to file # 3. Render in the ALE or not # DONE # 4. Print the keyword args for the environment? I think this might be helpful... # IN PROGRESS # 5. parser = argparse.ArgumentParser() parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability", type=float, default=0.25) parser.add_argument("-f", "--frameskip", help="frameskip", type=int, default=4) parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True) parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True) args = parser.parse_args() # Toggle the render mode based on the -o flag if args.observe == True: mode = "human" else: mode = "rgb_array" # Retrieve the environment eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode=mode, repeat_action_probability=args.repeat_action_probability, frameskip=args.frameskip,)) if args.print == True: env_info = str(eval_env.spec).split(", ") for item in env_info: print(item) # Evaluate the policy mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=1) print("mean rwd: ", mean_rwd) print("std rwd: ", std_rwd)