ledmands commited on
Commit
ebb75df
1 Parent(s): 980c356

Added video recording, plotting, and viewing script templates.

Browse files
agents/version_2/plot_evaluations.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from numpy import load
2
+ import matplotlib.pyplot as plt
3
+ # import matplotlib.axes
4
+
5
+ data = load("evaluations.npz")
6
+ lst = data.files # data.files lists the keys that are available for data
7
+
8
+ # print('ep_lengths: \n', data['ep_lengths'])
9
+
10
+ # results and ep_lengths are 2d arrays, because each evaluation is 5 episodes long.
11
+ # I want to plot the average of each evaluation.
12
+
13
+ # for each item in results, loop through the array and save the average
14
+ avg_ep_result_arr = []
15
+ for eval in data['results']:
16
+ result_sum = 0
17
+
18
+ for result in eval:
19
+ result_sum = result_sum + result
20
+
21
+ avg_ep_result = result_sum / len(eval)
22
+ avg_ep_result_arr.append(avg_ep_result)
23
+
24
+ avg_ep_len_arr = []
25
+ for eval in data['ep_lengths']:
26
+ max_len = 0
27
+ y_limit = 0
28
+ ep_len_sum = 0
29
+
30
+ for ep_length in eval:
31
+ ep_len_sum = ep_len_sum + ep_length
32
+ if ep_length > max_len:
33
+ max_len = ep_length
34
+ if ep_length > y_limit and y_limit < max_len:
35
+ y_limit = ep_length
36
+
37
+ avg_ep_len = ep_len_sum / len(eval)
38
+ avg_ep_len_arr.append(avg_ep_len)
39
+ y_limit = y_limit * 1.01
40
+
41
+
42
+ plt.plot(data['timesteps'], avg_ep_result_arr)
43
+ plt.bar(data['timesteps'], avg_ep_len_arr, width=10000)
44
+ plt.ylim(top=y_limit)
45
+
46
+ plt.show()
agents/version_2/record_video.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gymnasium as gym
2
+ from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv
3
+
4
+ env_id = "ALE/Pacman-v5"
5
+ video_folder = "./"
6
+ video_length = 100 #steps
7
+
8
+ vec_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])
9
+
10
+ obs = vec_env.reset()
11
+
12
+ # Record the video starting at the first step
13
+ vec_env = VecVideoRecorder(vec_env, video_folder,
14
+ record_video_trigger=lambda x: x == 0, video_length=video_length,
15
+ name_prefix=f"{env_id}")
16
+
17
+ vec_env.reset()
18
+ for _ in range(video_length + 1):
19
+ action = [vec_env.action_space.sample()]
20
+ obs, _, _, _ = vec_env.step(action)
21
+ # Save the video
22
+ vec_env.close()
agents/version_2/watch_agent.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from stable_baselines3 import DQN
2
+ from stable_baselines3.common.evaluation import evaluate_policy
3
+ from stable_baselines3.common.monitor import Monitor
4
+ import gymnasium as gym
5
+
6
+ MODEL_NAME = "ALE-Pacman-v5-control"
7
+
8
+ # the saved model does not contain the replay buffer
9
+ loaded_model = DQN.load(MODEL_NAME)
10
+ # print(f"The loaded_model has {loaded_model.replay_buffer.size()} transitions in its buffer")
11
+
12
+ # now the loaded replay is not empty anymore
13
+ # print(f"The loaded_model has {loaded_model.replay_buffer.size()} transitions in its buffer")
14
+
15
+
16
+ # Retrieve the environment
17
+ eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode="human", ))
18
+
19
+ # Evaluate the policy
20
+ mean_reward, std_reward = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=10, deterministic=False, )
21
+
22
+ print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")