Spaces:
Sleeping
Sleeping
from ddpg import Agent | |
import gymnasium as gym | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import torch | |
import argparse | |
from captum.attr import (IntegratedGradients) | |
class TrainingLoop: | |
def __init__(self): | |
pass | |
def train(self): | |
env = gym.make( | |
"LunarLander-v2", | |
continuous = True, | |
gravity = -10.0, | |
render_mode = None | |
) | |
agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4) | |
agent.load_models() | |
np.random.seed(0) | |
score_history = [] | |
for i in range(1000): | |
done = False | |
score = 0 | |
obs, _ = env.reset() | |
while not done: | |
act = agent.choose_action(obs) | |
new_state, reward, terminated, truncated, info = env.step(act) | |
done = terminated or truncated | |
agent.remember(obs, act, reward, new_state, int(done)) | |
agent.learn() | |
score += reward | |
obs = new_state | |
score_history.append(score) | |
print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:])) | |
if i % 25 == 0: | |
agent.save_models() | |
def load_trained(self): | |
env = gym.make( | |
"LunarLanderContinuous-v2", | |
render_mode = "human" | |
) | |
agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4) | |
agent.load_models() | |
np.random.seed(0) | |
score_history = [] | |
for i in range(50): | |
done = False | |
score = 0 | |
obs, _ = env.reset() | |
while not done: | |
act = agent.choose_action(obs) | |
new_state, reward, terminated, truncated, info = env.step(act) | |
done = terminated or truncated | |
score += reward | |
obs = new_state | |
score_history.append(score) | |
print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:])) | |
# Model Explainability | |
from captum.attr import (IntegratedGradients) | |
def _collect_running_baseline_average(self, num_iterations: int) -> torch.Tensor: | |
env = gym.make( | |
"LunarLanderContinuous-v2", | |
render_mode = None | |
) | |
agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4) | |
agent.load_models() | |
torch.manual_seed(0) | |
sum_obs = torch.zeros(8) | |
for i in range(num_iterations): | |
done = False | |
score = 0 | |
obs, _ = env.reset() | |
sum_obs += obs | |
print(f"Baseline on interation #{i}: {obs}") | |
while not done: | |
act = agent.choose_action(obs, attribution=None, baseline=None) | |
new_state, reward, terminated, truncated, info = env.step(act) | |
done = terminated or truncated | |
score += reward | |
obs = new_state | |
return sum_obs / num_iterations | |
def explain_trained(self, option: str, num_iterations :int = 10) -> None: | |
baseline_options = { | |
"1": torch.zeros(8), | |
"2": self._collect_running_baseline_average(num_iterations), | |
} | |
baseline = baseline_options[option] | |
env = gym.make( | |
"LunarLanderContinuous-v2", | |
render_mode = "human" | |
) | |
agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4) | |
agent.load_models() | |
ig = IntegratedGradients(agent.actor) | |
np.random.seed(0) | |
score_history = [] | |
for i in range(50): | |
done = False | |
score = 0 | |
obs, _ = env.reset() | |
while not done: | |
act = agent.choose_action(obs, attribution=ig, baseline=baseline) | |
new_state, reward, terminated, truncated, info = env.step(act) | |
done = terminated or truncated | |
score += reward | |
obs = new_state | |
score_history.append(score) | |
print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:])) | |