Added video recording, plotting, and viewing script templates.

Files changed (3) hide show

agents/version_2/plot_evaluations.py ADDED Viewed

+from numpy import load
+import matplotlib.pyplot as plt
+# import matplotlib.axes
+data = load("evaluations.npz")
+lst = data.files # data.files lists the keys that are available for data
+# print('ep_lengths: \n', data['ep_lengths'])
+# results and ep_lengths are 2d arrays, because each evaluation is 5 episodes long.
+# I want to plot the average of each evaluation.
+# for each item in results, loop through the array and save the average
+avg_ep_result_arr = []
+for eval in data['results']:
+    result_sum = 0
+    for result in eval:
+        result_sum = result_sum + result
+    avg_ep_result = result_sum / len(eval)
+    avg_ep_result_arr.append(avg_ep_result)
+avg_ep_len_arr = []
+for eval in data['ep_lengths']:
+    max_len = 0
+    y_limit = 0
+    ep_len_sum = 0
+    for ep_length in eval:
+        ep_len_sum = ep_len_sum + ep_length
+        if ep_length > max_len:
+            max_len = ep_length
+        if ep_length > y_limit and y_limit < max_len:
+            y_limit = ep_length
+    avg_ep_len = ep_len_sum / len(eval)
+    avg_ep_len_arr.append(avg_ep_len)
+    y_limit = y_limit * 1.01
+plt.plot(data['timesteps'], avg_ep_result_arr)
+plt.bar(data['timesteps'], avg_ep_len_arr, width=10000)
+plt.ylim(top=y_limit)
+plt.show()

agents/version_2/record_video.py ADDED Viewed

+import gymnasium as gym
+from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv
+env_id = "ALE/Pacman-v5"
+video_folder = "./"
+video_length = 100 #steps
+vec_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])
+obs = vec_env.reset()
+# Record the video starting at the first step
+vec_env = VecVideoRecorder(vec_env, video_folder,
+                       record_video_trigger=lambda x: x == 0, video_length=video_length,
+                       name_prefix=f"{env_id}")
+vec_env.reset()
+for _ in range(video_length + 1):
+  action = [vec_env.action_space.sample()]
+  obs, _, _, _ = vec_env.step(action)
+# Save the video
+vec_env.close()

agents/version_2/watch_agent.py ADDED Viewed

+from stable_baselines3 import DQN
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.monitor import Monitor
+import gymnasium as gym
+MODEL_NAME = "ALE-Pacman-v5-control"
+# the saved model does not contain the replay buffer
+loaded_model = DQN.load(MODEL_NAME)
+# print(f"The loaded_model has {loaded_model.replay_buffer.size()} transitions in its buffer")
+# now the loaded replay is not empty anymore
+# print(f"The loaded_model has {loaded_model.replay_buffer.size()} transitions in its buffer")
+# Retrieve the environment
+eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode="human", ))
+# Evaluate the policy
+mean_reward, std_reward = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=10, deterministic=False, )
+print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")