from stable_baselines3 import DQN from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.monitor import Monitor import gymnasium as gym MODEL_NAME = "ALE-Pacman-v5-control" # the saved model does not contain the replay buffer loaded_model = DQN.load(MODEL_NAME) # print(f"The loaded_model has {loaded_model.replay_buffer.size()} transitions in its buffer") # now the loaded replay is not empty anymore # print(f"The loaded_model has {loaded_model.replay_buffer.size()} transitions in its buffer") # Retrieve the environment eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode="human", )) # Evaluate the policy mean_reward, std_reward = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=10, deterministic=False, ) print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")