ledmands
commited on
Commit
•
ebb75df
1
Parent(s):
980c356
Added video recording, plotting, and viewing script templates.
Browse files
agents/version_2/plot_evaluations.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from numpy import load
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
# import matplotlib.axes
|
4 |
+
|
5 |
+
data = load("evaluations.npz")
|
6 |
+
lst = data.files # data.files lists the keys that are available for data
|
7 |
+
|
8 |
+
# print('ep_lengths: \n', data['ep_lengths'])
|
9 |
+
|
10 |
+
# results and ep_lengths are 2d arrays, because each evaluation is 5 episodes long.
|
11 |
+
# I want to plot the average of each evaluation.
|
12 |
+
|
13 |
+
# for each item in results, loop through the array and save the average
|
14 |
+
avg_ep_result_arr = []
|
15 |
+
for eval in data['results']:
|
16 |
+
result_sum = 0
|
17 |
+
|
18 |
+
for result in eval:
|
19 |
+
result_sum = result_sum + result
|
20 |
+
|
21 |
+
avg_ep_result = result_sum / len(eval)
|
22 |
+
avg_ep_result_arr.append(avg_ep_result)
|
23 |
+
|
24 |
+
avg_ep_len_arr = []
|
25 |
+
for eval in data['ep_lengths']:
|
26 |
+
max_len = 0
|
27 |
+
y_limit = 0
|
28 |
+
ep_len_sum = 0
|
29 |
+
|
30 |
+
for ep_length in eval:
|
31 |
+
ep_len_sum = ep_len_sum + ep_length
|
32 |
+
if ep_length > max_len:
|
33 |
+
max_len = ep_length
|
34 |
+
if ep_length > y_limit and y_limit < max_len:
|
35 |
+
y_limit = ep_length
|
36 |
+
|
37 |
+
avg_ep_len = ep_len_sum / len(eval)
|
38 |
+
avg_ep_len_arr.append(avg_ep_len)
|
39 |
+
y_limit = y_limit * 1.01
|
40 |
+
|
41 |
+
|
42 |
+
plt.plot(data['timesteps'], avg_ep_result_arr)
|
43 |
+
plt.bar(data['timesteps'], avg_ep_len_arr, width=10000)
|
44 |
+
plt.ylim(top=y_limit)
|
45 |
+
|
46 |
+
plt.show()
|
agents/version_2/record_video.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gymnasium as gym
|
2 |
+
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv
|
3 |
+
|
4 |
+
env_id = "ALE/Pacman-v5"
|
5 |
+
video_folder = "./"
|
6 |
+
video_length = 100 #steps
|
7 |
+
|
8 |
+
vec_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])
|
9 |
+
|
10 |
+
obs = vec_env.reset()
|
11 |
+
|
12 |
+
# Record the video starting at the first step
|
13 |
+
vec_env = VecVideoRecorder(vec_env, video_folder,
|
14 |
+
record_video_trigger=lambda x: x == 0, video_length=video_length,
|
15 |
+
name_prefix=f"{env_id}")
|
16 |
+
|
17 |
+
vec_env.reset()
|
18 |
+
for _ in range(video_length + 1):
|
19 |
+
action = [vec_env.action_space.sample()]
|
20 |
+
obs, _, _, _ = vec_env.step(action)
|
21 |
+
# Save the video
|
22 |
+
vec_env.close()
|
agents/version_2/watch_agent.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from stable_baselines3 import DQN
|
2 |
+
from stable_baselines3.common.evaluation import evaluate_policy
|
3 |
+
from stable_baselines3.common.monitor import Monitor
|
4 |
+
import gymnasium as gym
|
5 |
+
|
6 |
+
MODEL_NAME = "ALE-Pacman-v5-control"
|
7 |
+
|
8 |
+
# the saved model does not contain the replay buffer
|
9 |
+
loaded_model = DQN.load(MODEL_NAME)
|
10 |
+
# print(f"The loaded_model has {loaded_model.replay_buffer.size()} transitions in its buffer")
|
11 |
+
|
12 |
+
# now the loaded replay is not empty anymore
|
13 |
+
# print(f"The loaded_model has {loaded_model.replay_buffer.size()} transitions in its buffer")
|
14 |
+
|
15 |
+
|
16 |
+
# Retrieve the environment
|
17 |
+
eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode="human", ))
|
18 |
+
|
19 |
+
# Evaluate the policy
|
20 |
+
mean_reward, std_reward = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=10, deterministic=False, )
|
21 |
+
|
22 |
+
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
|