diff --git a/DDQN_FNN.py b/DDQN_FNN.py
new file mode 100644
index 0000000000000000000000000000000000000000..161638e9d1227b25920b83189ac1ee7f4e43b393
--- /dev/null
+++ b/DDQN_FNN.py
@@ -0,0 +1,103 @@
+import numpy as np
+from collections import deque
+from tensorflow import keras
+import random
+
+
+class DoubleDeepQNetwork:
+    def __init__(self, states, actions, history, alpha, gamma, epsilon, epsilon_min, epsilon_decay):
+        self.nS = states
+        self.nA = actions
+        self.history = history
+        self.memory = deque([], maxlen=2500)
+        self.alpha = alpha
+        self.gamma = gamma
+        # Explore/Exploit
+        self.epsilon = epsilon
+        self.epsilon_min = epsilon_min
+        self.epsilon_decay = epsilon_decay
+        self.model = self.build_model()
+        self.model_target = self.build_model()  # Second (target) neural network
+        self.update_target_from_model()  # Update weights
+        self.loss = []
+
+    def build_model(self):
+        model = keras.Sequential()  # linear stack of layers https://keras.io/models/sequential/
+        model.add(keras.layers.Dense(24, input_dim=self.history * self.nS, activation='relu'))  # [Input] -> Layer 1
+        #   Dense: Densely connected layer https://keras.io/layers/core/
+        #   24: Number of neurons
+        #   input_dim: Number of input variables
+        #   activation: Rectified Linear Unit (relu) ranges >= 0
+        model.add(keras.layers.Dense(24, activation='relu'))  # Layer 2 -> 3
+        model.add(keras.layers.Dense(self.nA, activation='linear'))  # Layer 3 -> [output]
+        #   Size has to match the output (different actions)
+        #   Linear activation on the last layer
+        model.compile(loss='mean_squared_error',  # Loss function: Mean Squared Error
+                      optimizer=keras.optimizers.Adam(
+                          lr=self.alpha))  # Optimizer: Adam (Feel free to check other options)
+        return model
+
+    def update_target_from_model(self):
+        # Update the target model from the base model
+        self.model_target.set_weights(self.model.get_weights())
+
+    def action(self, state):
+        if np.random.rand() <= self.epsilon:
+            return random.randrange(self.nA)  # Explore
+        action_vals = self.model.predict(state)  # Exploit: Use the NN to predict the correct action from this state
+        return np.argmax(action_vals[0])
+
+    def test_action(self, state):  # Exploit
+        action_vals = self.model.predict(state)
+        return np.argmax(action_vals[0])
+
+    def store(self, state, action, reward, next_state, done):
+        # Store the experience in memory
+        self.memory.append((state, action, reward, next_state, done))
+
+    def save_model(self, agentName):
+        # Save the agent model weights in a file
+        self.model.save(agentName)
+
+    def experience_replay(self, batch_size):
+        # Execute the experience replay
+        minibatch = random.sample(self.memory, batch_size)  # Randomly sample from memory
+
+        # Convert to numpy for speed by vectorization
+        x = []
+        y = []
+        np_array = np.array(minibatch)
+        st = np.zeros((0, self.history*self.nS))  # States
+        nst = np.zeros((0, self.history*self.nS))  # Next States
+        for i in range(len(np_array)):  # Creating the state and next state np arrays
+            st = np.append(st, np_array[i, 0], axis=0)
+            nst = np.append(nst, np_array[i, 3], axis=0)
+        st_predict = self.model.predict(st)  # Here is the speedup! I can predict on the ENTIRE batch
+        nst_predict = self.model.predict(nst)
+        nst_predict_target = self.model_target.predict(nst)  # Predict from the TARGET
+        index = 0
+        for state, action, reward, next_state, done in minibatch:
+            x.append(state)
+            # Predict from state
+            nst_action_predict_target = nst_predict_target[index]
+            nst_action_predict_model = nst_predict[index]
+            if done:  # Terminal: Just assign reward much like {* (not done) - QB[state][action]}
+                target = reward
+            else:  # Non-terminal
+                target = reward + self.gamma * nst_action_predict_target[
+                    np.argmax(nst_action_predict_model)]  # Using Q to get T is Double DQN
+            target_f = st_predict[index]
+            target_f[action] = target
+            y.append(target_f)
+            index += 1
+        # Reshape for Keras Fit
+        x_reshape = np.array(x).reshape(batch_size, self.history * self.nS)
+        y_reshape = np.array(y)
+        epoch_count = 1
+        hist = self.model.fit(x_reshape, y_reshape, epochs=epoch_count, verbose=0)
+        # Graph Losses
+        for i in range(epoch_count):
+            self.loss.append(hist.history['loss'][i])
+        # Decay Epsilon
+        if self.epsilon > self.epsilon_min:
+            self.epsilon *= self.epsilon_decay
diff --git a/agents/Base_Agent.py b/agents/Base_Agent.py
deleted file mode 100644
index 3a3e143e8307d6b4e497ce3990147971ab966e72..0000000000000000000000000000000000000000
--- a/agents/Base_Agent.py
+++ /dev/null
@@ -1,394 +0,0 @@
-import logging
-import os
-import sys
-import gym
-import random
-import numpy as np
-import torch
-import time
-# import tensorflow as tf
-from nn_builder.pytorch.NN import NN
-# from tensorboardX import SummaryWriter
-from torch.optim import optimizer
-
-
-class Base_Agent(object):
-
-    def __init__(self, config):
-        self.logger = self.setup_logger()
-        self.debug_mode = config.debug_mode
-        # if self.debug_mode: self.tensorboard = SummaryWriter()
-        self.config = config
-        self.set_random_seeds(config.seed)
-        self.environment = config.environment
-        self.environment_title = self.get_environment_title()
-        self.action_types = "DISCRETE" if self.environment.action_space.dtype == np.int64 else "CONTINUOUS"
-        self.action_size = int(self.get_action_size())
-        self.config.action_size = self.action_size
-
-        self.lowest_possible_episode_score = self.get_lowest_possible_episode_score()
-
-        self.state_size = int(self.get_state_size())
-        self.hyperparameters = config.hyperparameters
-        self.average_score_required_to_win = self.get_score_required_to_win()
-        self.rolling_score_window = self.get_trials()
-        # self.max_steps_per_episode = self.environment.spec.max_episode_steps
-        self.total_episode_score_so_far = 0
-        self.game_full_episode_scores = []
-        self.game_full_episode_signals = []
-        self.rolling_results = []
-        self.max_rolling_score_seen = float("-inf")
-        self.max_episode_score_seen = float("-inf")
-        self.episode_number = 0
-        self.device = "cuda:0" if config.use_GPU else "cpu"
-        self.visualise_results_boolean = config.visualise_individual_results
-        self.global_step_number = 0
-        self.turn_off_exploration = False if config.training else True
-        gym.logger.set_level(40)  # stops it from printing an unnecessary warning
-        self.log_game_info()
-
-    def step(self):
-        """Takes a step in the game. This method must be overriden by any agent"""
-        raise ValueError("Step needs to be implemented by the agent")
-
-    def get_environment_title(self):
-        """Extracts name of environment from it"""
-        try:
-            name = self.environment.unwrapped.id
-        except AttributeError:
-            try:
-                if str(self.environment.unwrapped)[1:11] == "FetchReach":
-                    return "FetchReach"
-                elif str(self.environment.unwrapped)[1:8] == "AntMaze":
-                    return "AntMaze"
-                elif str(self.environment.unwrapped)[1:7] == "Hopper":
-                    return "Hopper"
-                elif str(self.environment.unwrapped)[1:9] == "Walker2d":
-                    return "Walker2d"
-                else:
-                    name = self.environment.spec.id.split("-")[0]
-            except AttributeError:
-                name = str(self.environment.env)
-                if name[0:10] == "TimeLimit<": name = name[10:]
-                name = name.split(" ")[0]
-                if name[0] == "<": name = name[1:]
-                if name[-3:] == "Env": name = name[:-3]
-        return name
-
-    def get_lowest_possible_episode_score(self):
-        """Returns the lowest possible episode score you can get in an environment"""
-        if self.environment_title == "Taxi": return -800
-        return None
-
-    def get_action_size(self):
-        """Gets the action_size for the gym env into the correct shape for a neural network"""
-        if "overwrite_action_size" in self.config.__dict__: return self.config.overwrite_action_size
-        if "action_size" in self.environment.__dict__: return self.environment.action_size
-        if self.action_types == "DISCRETE":
-            return self.environment.action_space.n
-        else:
-            return self.environment.action_space.shape[0]
-
-    def get_state_size(self):
-        """Gets the state_size for the gym env into the correct shape for a neural network"""
-        random_state = self.environment.reset()
-        if isinstance(random_state, dict):
-            state_size = random_state["observation"].shape[0] + random_state["desired_goal"].shape[0]
-            return state_size
-        else:
-            return random_state.size
-
-    def get_score_required_to_win(self):
-        """Gets average score required to win game"""
-        print("TITLE ", self.environment_title)
-        if self.environment_title == "FetchReach": return -5
-        if self.environment_title in ["AntMaze", "Hopper", "Walker2d"]:
-            print("Score required to win set to infinity therefore no learning rate annealing will happen")
-            return float("inf")
-        try:
-            return self.environment.unwrapped.reward_threshold
-        except AttributeError:
-            try:
-                return self.environment.spec.reward_threshold
-            except AttributeError:
-                return self.environment.unwrapped.spec.reward_threshold
-
-    def get_trials(self):
-        """Gets the number of trials to average a score over"""
-        if self.environment_title in ["AntMaze", "FetchReach", "Hopper", "Walker2d", "CartPole"]: return 100
-        try:
-            return self.environment.unwrapped.trials
-        except AttributeError:
-            return self.environment.spec.trials
-
-    def setup_logger(self):
-        """Sets up the logger"""
-        filename = "Training.log"
-        try:
-            if os.path.isfile(filename):
-                os.remove(filename)
-        except:
-            pass
-
-        logger = logging.getLogger(__name__)
-        logger.setLevel(logging.INFO)
-        # create a file handler
-        handler = logging.FileHandler(filename)
-        handler.setLevel(logging.INFO)
-        # create a logging format
-        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-        handler.setFormatter(formatter)
-        # add the handlers to the logger
-        logger.addHandler(handler)
-        return logger
-
-    def log_game_info(self):
-        """Logs info relating to the game"""
-        for ix, param in enumerate(
-                [self.environment_title, self.action_types, self.action_size, self.lowest_possible_episode_score,
-                 self.state_size, self.hyperparameters, self.average_score_required_to_win, self.rolling_score_window,
-                 self.device]):
-            self.logger.info("{} -- {}".format(ix, param))
-
-    def set_random_seeds(self, random_seed):
-        """Sets all possible random seeds so results can be reproduced"""
-        os.environ['PYTHONHASHSEED'] = str(random_seed)
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.benchmark = False
-        torch.manual_seed(random_seed)
-        # tf.set_random_seed(random_seed)
-        random.seed(random_seed)
-        np.random.seed(random_seed)
-        if torch.cuda.is_available():
-            torch.cuda.manual_seed_all(random_seed)
-            torch.cuda.manual_seed(random_seed)
-        if hasattr(gym.spaces, 'prng'):
-            gym.spaces.prng.seed(random_seed)
-
-    def reset_game(self):
-        """Resets the game information so we are ready to play a new episode"""
-        self.environment.seed(self.config.seed)
-        self.state = self.environment.reset()
-        self.next_state = None
-        self.action = None
-        self.reward = None
-        self.signal = None
-        self.done = False
-        self.total_episode_score_so_far = 0
-        self.total_episode_signal_so_far = 0
-        self.episode_states = []
-        self.episode_rewards = []
-        self.episode_signals = []
-        self.episode_actions = []
-        self.episode_next_states = []
-        self.episode_dones = []
-        self.episode_desired_goals = []
-        self.episode_achieved_goals = []
-        self.episode_observations = []
-        if "exploration_strategy" in self.__dict__.keys(): self.exploration_strategy.reset()
-        self.logger.info("Reseting game -- New start state {}".format(self.state))
-
-    def track_episodes_data(self):
-        """Saves the data from the recent episodes"""
-        self.episode_states.append(self.state)
-        self.episode_actions.append(self.action)
-        self.episode_rewards.append(self.reward)
-        self.episode_signals.append(self.signal)
-        self.episode_next_states.append(self.next_state)
-        self.episode_dones.append(self.done)
-
-    def run_n_episodes(self, num_episodes=None, show_whether_achieved_goal=True, save_and_print_results=True):
-        """Runs game to completion n times and then summarises results and saves model (if asked to)"""
-        if num_episodes is None: num_episodes = self.config.num_episodes_to_run
-        start = time.time()
-        while self.episode_number < num_episodes:
-            self.reset_game()
-            self.step()
-            if save_and_print_results: self.save_and_print_result()
-        time_taken = time.time() - start
-        if show_whether_achieved_goal: self.show_whether_achieved_goal()
-        if self.config.save_model: self.locally_save_policy()
-        return self.game_full_episode_scores, self.rolling_results, time_taken, self.game_full_episode_signals
-
-    def conduct_action(self, action):
-        """Conducts an action in the environment"""
-        self.next_state, self.reward, self.done, self.signal = self.environment.step(action)
-        self.total_episode_score_so_far += self.reward
-        self.total_episode_signal_so_far += self.signal
-        if self.hyperparameters["clip_rewards"]: self.reward = max(min(self.reward, 1.0), -1.0)
-
-    def save_and_print_result(self):
-        """Saves and prints results of the game"""
-        self.save_result()
-        self.print_rolling_result()
-
-    def save_result(self):
-        """Saves the result of an episode of the game"""
-        self.game_full_episode_scores.append(self.total_episode_score_so_far)
-        self.game_full_episode_signals.append(self.total_episode_signal_so_far)
-        self.rolling_results.append(np.mean(self.game_full_episode_scores[-1 * self.rolling_score_window:]))
-        self.save_max_result_seen()
-
-    def save_max_result_seen(self):
-        """Updates the best episode result seen so far"""
-        if self.game_full_episode_scores[-1] > self.max_episode_score_seen:
-            self.max_episode_score_seen = self.game_full_episode_scores[-1]
-
-        if self.rolling_results[-1] > self.max_rolling_score_seen:
-            if len(self.rolling_results) > self.rolling_score_window:
-                self.max_rolling_score_seen = self.rolling_results[-1]
-
-    def print_rolling_result(self):
-        """Prints out the latest episode results"""
-        text = """"\r Episode {0}, Score: {3: .2f}, Max score seen: {4: .2f}, Rolling score: {1: .2f}, Max rolling score seen: {2: .2f}"""
-        sys.stdout.write(
-            text.format(len(self.game_full_episode_scores), self.rolling_results[-1], self.max_rolling_score_seen,
-                        self.game_full_episode_scores[-1], self.max_episode_score_seen))
-        sys.stdout.flush()
-
-    def show_whether_achieved_goal(self):
-        """Prints out whether the agent achieved the environment target goal"""
-        index_achieved_goal = self.achieved_required_score_at_index()
-        print(" ")
-        if index_achieved_goal == -1:  # this means agent never achieved goal
-            print("\033[91m" + "\033[1m" +
-                  "{} did not achieve required score \n".format(self.agent_name) +
-                  "\033[0m" + "\033[0m")
-        else:
-            print("\033[92m" + "\033[1m" +
-                  "{} achieved required score at episode {} \n".format(self.agent_name, index_achieved_goal) +
-                  "\033[0m" + "\033[0m")
-
-    def achieved_required_score_at_index(self):
-        """Returns the episode at which agent achieved goal or -1 if it never achieved it"""
-        for ix, score in enumerate(self.rolling_results):
-            if score > self.average_score_required_to_win:
-                return ix
-        return -1
-
-    def update_learning_rate(self, starting_lr, optimizer):
-        """Lowers the learning rate according to how close we are to the solution"""
-        if len(self.rolling_results) > 0:
-            last_rolling_score = self.rolling_results[-1]
-            if last_rolling_score > 0.75 * self.average_score_required_to_win:
-                new_lr = starting_lr / 100.0
-            elif last_rolling_score > 0.6 * self.average_score_required_to_win:
-                new_lr = starting_lr / 20.0
-            elif last_rolling_score > 0.5 * self.average_score_required_to_win:
-                new_lr = starting_lr / 10.0
-            elif last_rolling_score > 0.25 * self.average_score_required_to_win:
-                new_lr = starting_lr / 2.0
-            else:
-                new_lr = starting_lr
-            for g in optimizer.param_groups:
-                g['lr'] = new_lr
-        if random.random() < 0.001: self.logger.info("Learning rate {}".format(new_lr))
-
-    def enough_experiences_to_learn_from(self):
-        """Boolean indicated whether there are enough experiences in the memory buffer to learn from"""
-        return len(self.memory) > self.hyperparameters["batch_size"]
-
-    def save_experience(self, memory=None, experience=None):
-        """Saves the recent experience to the memory buffer"""
-        if memory is None: memory = self.memory
-        if experience is None: experience = self.state, self.action, self.reward, self.next_state, self.done
-        memory.add_experience(*experience)
-
-    def take_optimisation_step(self, optimizer, network, loss, clipping_norm=None, retain_graph=False):
-        """Takes an optimisation step by calculating gradients given the loss and then updating the parameters"""
-        if not isinstance(network, list): network = [network]
-        optimizer.zero_grad()  # reset gradients to 0
-        loss.backward(retain_graph=retain_graph)  # this calculates the gradients
-        self.logger.info("Loss -- {}".format(loss.item()))
-        if self.debug_mode: self.log_gradient_and_weight_information(network, optimizer)
-        if clipping_norm is not None:
-            for net in network:
-                torch.nn.utils.clip_grad_norm_(net.parameters(),
-                                               clipping_norm)  # clip gradients to help stabilise training
-        optimizer.step()  # this applies the gradients
-
-    def log_gradient_and_weight_information(self, network, optimizer):
-
-        # log weight information
-        total_norm = 0
-        for name, param in network.named_parameters():
-            param_norm = param.grad.data.norm(2)
-            total_norm += param_norm.item() ** 2
-        total_norm = total_norm ** (1. / 2)
-        self.logger.info("Gradient Norm {}".format(total_norm))
-
-        for g in optimizer.param_groups:
-            learning_rate = g['lr']
-            break
-        self.logger.info("Learning Rate {}".format(learning_rate))
-
-    def soft_update_of_target_network(self, local_model, target_model, tau):
-        """Updates the target network in the direction of the local network but by taking a step size
-        less than one so the target network's parameter values trail the local networks. This helps stabilise training"""
-        for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
-            target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data)
-
-    def create_NN(self, input_dim, output_dim, key_to_use=None, override_seed=None, hyperparameters=None):
-        """Creates a neural network for the agents to use"""
-        if hyperparameters is None: hyperparameters = self.hyperparameters
-        if key_to_use: hyperparameters = hyperparameters[key_to_use]
-        if override_seed:
-            seed = override_seed
-        else:
-            seed = self.config.seed
-
-        default_hyperparameter_choices = {"output_activation": None, "hidden_activations": "relu", "dropout": 0.0,
-                                          "initialiser": "default", "batch_norm": False,
-                                          "columns_of_data_to_be_embedded": [],
-                                          "embedding_dimensions": [], "y_range": ()}
-
-        for key in default_hyperparameter_choices:
-            if key not in hyperparameters.keys():
-                hyperparameters[key] = default_hyperparameter_choices[key]
-
-        return NN(input_dim=input_dim, layers_info=hyperparameters["linear_hidden_units"] + [output_dim],
-                  output_activation=hyperparameters["final_layer_activation"],
-                  batch_norm=hyperparameters["batch_norm"], dropout=hyperparameters["dropout"],
-                  hidden_activations=hyperparameters["hidden_activations"], initialiser=hyperparameters["initialiser"],
-                  columns_of_data_to_be_embedded=hyperparameters["columns_of_data_to_be_embedded"],
-                  embedding_dimensions=hyperparameters["embedding_dimensions"], y_range=hyperparameters["y_range"],
-                  random_seed=seed).to(self.device)
-
-    def turn_on_any_epsilon_greedy_exploration(self):
-        """Turns off all exploration with respect to the epsilon greedy exploration strategy"""
-        print("Turning on epsilon greedy exploration")
-        self.turn_off_exploration = False
-
-    def turn_off_any_epsilon_greedy_exploration(self):
-        """Turns off all exploration with respect to the epsilon greedy exploration strategy"""
-        print("Turning off epsilon greedy exploration")
-        self.turn_off_exploration = True
-
-    def freeze_all_but_output_layers(self, network):
-        """Freezes all layers except the output layer of a network"""
-        print("Freezing hidden layers")
-        for param in network.named_parameters():
-            param_name = param[0]
-            assert "hidden" in param_name or "output" in param_name or "embedding" in param_name, "Name {} of network layers not understood".format(
-                param_name)
-            if "output" not in param_name:
-                param[1].requires_grad = False
-
-    def unfreeze_all_layers(self, network):
-        """Unfreezes all layers of a network"""
-        print("Unfreezing all layers")
-        for param in network.parameters():
-            param.requires_grad = True
-
-    @staticmethod
-    def move_gradients_one_model_to_another(from_model, to_model, set_from_gradients_to_zero=False):
-        """Copies gradients from from_model to to_model"""
-        for from_model, to_model in zip(from_model.parameters(), to_model.parameters()):
-            to_model._grad = from_model.grad.clone()
-            if set_from_gradients_to_zero: from_model._grad = None
-
-    @staticmethod
-    def copy_model_over(from_model, to_model):
-        """Copies model parameters from from_model to to_model"""
-        for to_model, from_model in zip(to_model.parameters(), from_model.parameters()):
-            to_model.data.copy_(from_model.data.clone())
diff --git a/agents/DQN_agents/DDQN.py b/agents/DQN_agents/DDQN.py
deleted file mode 100644
index a0c36e58d72e8e5a53071ce313e9eba28d0bb517..0000000000000000000000000000000000000000
--- a/agents/DQN_agents/DDQN.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets
-
-class DDQN(DQN_With_Fixed_Q_Targets):
-    """A double DQN agent"""
-    agent_name = "DDQN"
-
-    def __init__(self, config):
-        DQN_With_Fixed_Q_Targets.__init__(self, config)
-
-    def compute_q_values_for_next_states(self, next_states):
-        """Computes the q_values for next state we will use to create the loss to train the Q network. Double DQN
-        uses the local index to pick the maximum q_value action and then the target network to calculate the q_value.
-        The reasoning behind this is that it will help stop the network from overestimating q values"""
-        max_action_indexes = self.q_network_local(next_states).detach().argmax(1)
-        Q_targets_next = self.q_network_target(next_states).gather(1, max_action_indexes.unsqueeze(1))
-        return Q_targets_next 
-            
-
diff --git a/agents/DQN_agents/DDQN_With_Prioritised_Experience_Replay.py b/agents/DQN_agents/DDQN_With_Prioritised_Experience_Replay.py
deleted file mode 100644
index ccdd6d627c6e74d8956b41693186b13692772568..0000000000000000000000000000000000000000
--- a/agents/DQN_agents/DDQN_With_Prioritised_Experience_Replay.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import torch
-import torch.nn.functional as F
-from agents.DQN_agents.DDQN import DDQN
-from utilities.data_structures.Prioritised_Replay_Buffer import Prioritised_Replay_Buffer
-
-class DDQN_With_Prioritised_Experience_Replay(DDQN):
-    """A DQN agent with prioritised experience replay"""
-    agent_name = "DDQN with Prioritised Replay"
-
-    def __init__(self, config):
-        DDQN.__init__(self, config)
-        self.memory = Prioritised_Replay_Buffer(self.hyperparameters, config.seed)
-
-    def learn(self):
-        """Runs a learning iteration for the Q network after sampling from the replay buffer in a prioritised way"""
-        sampled_experiences, importance_sampling_weights = self.memory.sample()
-        states, actions, rewards, next_states, dones = sampled_experiences
-        loss, td_errors = self.compute_loss_and_td_errors(states, next_states, rewards, actions, dones, importance_sampling_weights)
-        self.take_optimisation_step(self.q_network_optimizer, self.q_network_local, loss, self.hyperparameters["gradient_clipping_norm"])
-        self.soft_update_of_target_network(self.q_network_local, self.q_network_target, self.hyperparameters["tau"])
-        self.memory.update_td_errors(td_errors.squeeze(1))
-
-    def save_experience(self):
-        """Saves the latest experience including the td_error"""
-        max_td_error_in_experiences = self.memory.give_max_td_error() + 1e-9
-        self.memory.add_experience(max_td_error_in_experiences, self.state, self.action, self.reward, self.next_state, self.done)
-
-    def compute_loss_and_td_errors(self, states, next_states, rewards, actions, dones, importance_sampling_weights):
-        """Calculates the loss for the local Q network. It weighs each observations loss according to the importance
-        sampling weights which come from the prioritised replay buffer"""
-        Q_targets = self.compute_q_targets(next_states, rewards, dones)
-        Q_expected = self.compute_expected_q_values(states, actions)
-        loss = F.mse_loss(Q_expected, Q_targets)
-        loss = loss * importance_sampling_weights
-        loss = torch.mean(loss)
-        td_errors = Q_targets.data.cpu().numpy() - Q_expected.data.cpu().numpy()
-        return loss, td_errors
\ No newline at end of file
diff --git a/agents/DQN_agents/DQN.py b/agents/DQN_agents/DQN.py
deleted file mode 100644
index 42a14f4a53913581709848b8fbcff7f154543752..0000000000000000000000000000000000000000
--- a/agents/DQN_agents/DQN.py
+++ /dev/null
@@ -1,135 +0,0 @@
-from collections import Counter
-
-import torch
-import random
-import torch.optim as optim
-import torch.nn.functional as F
-import numpy as np
-from agents.Base_Agent import Base_Agent
-from exploration_strategies.Epsilon_Greedy_Exploration import Epsilon_Greedy_Exploration
-from utilities.data_structures.Replay_Buffer import Replay_Buffer
-
-
-class DQN(Base_Agent):
-    """A deep Q learning agent"""
-    agent_name = "DQN"
-
-    def __init__(self, config):
-        Base_Agent.__init__(self, config)
-        self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"],
-                                    config.seed, self.device)
-        self.q_network_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size)
-        self.q_network_optimizer = optim.Adam(self.q_network_local.parameters(),
-                                              lr=self.hyperparameters["learning_rate"], eps=1e-4)
-        self.exploration_strategy = Epsilon_Greedy_Exploration(config)
-
-    def reset_game(self):
-        super(DQN, self).reset_game()
-        self.update_learning_rate(self.hyperparameters["learning_rate"], self.q_network_optimizer)
-
-    def step(self):
-        """Runs a step within a game including a learning step if required"""
-        while not self.done:
-            self.action = self.pick_action()
-            self.conduct_action(self.action)
-            # If we are in training mode
-            if self.config.training:
-                if self.time_for_q_network_to_learn():
-                    for _ in range(self.hyperparameters["learning_iterations"]):
-                        self.learn()
-                self.save_experience()
-            self.state = self.next_state  # this is to set the state for the next iteration
-            self.global_step_number += 1
-        self.episode_number += 1
-
-    def pick_action(self, state=None):
-        """Uses the local Q network and an epsilon greedy policy to pick an action"""
-        # PyTorch only accepts mini-batches and not single observations so we have to use unsqueeze to add
-        # a "fake" dimension to make it a mini-batch rather than a single observation
-        if state is None: state = self.state
-        if isinstance(state, np.int64) or isinstance(state, int): state = np.array([state])
-        state = torch.from_numpy(state).float().unsqueeze(0).to(self.device)
-        if len(state.shape) < 2: state = state.unsqueeze(0)
-        if not self.config.training:
-            self.q_network_local = self.locally_load_policy()
-        self.q_network_local.eval()  # puts network in evaluation mode
-        with torch.no_grad():
-            action_values = self.q_network_local(state)
-        if self.config.training:
-            self.q_network_local.train()  # puts network back in training mode
-        action = self.exploration_strategy.perturb_action_for_exploration_purposes({"action_values": action_values,
-                                                                                        "turn_off_exploration": self.turn_off_exploration,
-                                                                                        "episode_number": self.episode_number})
-        self.logger.info("Q values {} -- Action chosen {}".format(action_values, action))
-        return action
-
-    def learn(self, experiences=None):
-        """Runs a learning iteration for the Q network"""
-        if experiences is None:
-            states, actions, rewards, next_states, dones = self.sample_experiences()  # Sample experiences
-        else:
-            states, actions, rewards, next_states, dones = experiences
-        loss = self.compute_loss(states, next_states, rewards, actions, dones)
-
-        actions_list = [action_X.item() for action_X in actions]
-
-        self.logger.info("Action counts {}".format(Counter(actions_list)))
-        self.take_optimisation_step(self.q_network_optimizer, self.q_network_local, loss,
-                                    self.hyperparameters["gradient_clipping_norm"])
-
-    def compute_loss(self, states, next_states, rewards, actions, dones):
-        """Computes the loss required to train the Q network"""
-        with torch.no_grad():
-            Q_targets = self.compute_q_targets(next_states, rewards, dones)
-        Q_expected = self.compute_expected_q_values(states, actions)
-        loss = F.mse_loss(Q_expected, Q_targets)
-        return loss
-
-    def compute_q_targets(self, next_states, rewards, dones):
-        """Computes the q_targets we will compare to predicted q values to create the loss to train the Q network"""
-        Q_targets_next = self.compute_q_values_for_next_states(next_states)
-        Q_targets = self.compute_q_values_for_current_states(rewards, Q_targets_next, dones)
-        return Q_targets
-
-    def compute_q_values_for_next_states(self, next_states):
-        """Computes the q_values for next state we will use to create the loss to train the Q network"""
-        Q_targets_next = self.q_network_local(next_states).detach().max(1)[0].unsqueeze(1)
-        return Q_targets_next
-
-    def compute_q_values_for_current_states(self, rewards, Q_targets_next, dones):
-        """Computes the q_values for current state we will use to create the loss to train the Q network"""
-        Q_targets_current = rewards + (self.hyperparameters["discount_rate"] * Q_targets_next * (1 - dones))
-        return Q_targets_current
-
-    def compute_expected_q_values(self, states, actions):
-        """Computes the expected q_values we will use to create the loss to train the Q network"""
-        Q_expected = self.q_network_local(states).gather(1,
-                                                         actions.long())  # must convert actions to long so can be used as index
-        return Q_expected
-
-    def locally_save_policy(self):
-        """Saves the policy"""
-        torch.save(self.q_network_local.state_dict(),
-                   "{}/{}_network.pt".format(self.config.models_dir, self.agent_name))
-
-    def locally_load_policy(self):
-        """loads the policy"""
-        filename = f'{self.config.models_dir}/{self.agent_name}_network.pt'
-        saved_q_network_local = self.q_network_local
-        saved_q_network_local.load_state_dict(torch.load(filename))
-        return saved_q_network_local
-
-    def time_for_q_network_to_learn(self):
-        """Returns boolean indicating whether enough steps have been taken for learning to begin and there are
-        enough experiences in the replay buffer to learn from"""
-        return self.right_amount_of_steps_taken() and self.enough_experiences_to_learn_from()
-
-    def right_amount_of_steps_taken(self):
-        """Returns boolean indicating whether enough steps have been taken for learning to begin"""
-        return self.global_step_number % self.hyperparameters["update_every_n_steps"] == 0
-
-    def sample_experiences(self):
-        """Draws a random sample of experience from the memory buffer"""
-        experiences = self.memory.sample()
-        states, actions, rewards, next_states, dones = experiences
-        return states, actions, rewards, next_states, dones
diff --git a/agents/DQN_agents/DQN_HER.py b/agents/DQN_agents/DQN_HER.py
deleted file mode 100644
index 9110a385d9f9da61216d8a853a8af12749d57cee..0000000000000000000000000000000000000000
--- a/agents/DQN_agents/DQN_HER.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from agents.DQN_agents.DQN import DQN
-from agents.HER_Base import HER_Base
-
-class DQN_HER(HER_Base, DQN):
-    """DQN algorithm with hindsight experience replay"""
-    agent_name = "DQN-HER"
-    def __init__(self, config):
-        DQN.__init__(self, config)
-        HER_Base.__init__(self, self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"],
-                          self.hyperparameters["HER_sample_proportion"])
-
-    def step(self):
-        """Runs a step within a game including a learning step if required"""
-        while not self.done:
-            self.action = self.pick_action()
-            self.conduct_action_in_changeable_goal_envs(self.action)
-            if self.time_for_q_network_to_learn():
-                for _ in range(self.hyperparameters["learning_iterations"]):
-                    self.learn(experiences=self.sample_from_HER_and_Ordinary_Buffer())
-            self.track_changeable_goal_episodes_data()
-            self.save_experience()
-            if self.done: self.save_alternative_experience()
-            self.state_dict = self.next_state_dict  # this is to set the state for the next iteration
-            self.state = self.next_state
-            self.global_step_number += 1
-        self.episode_number += 1
-
-    def enough_experiences_to_learn_from(self):
-        """Returns booleans indicating whether there are enough experiences in the two replay buffers to learn from"""
-        return len(self.memory) > self.ordinary_buffer_batch_size and len(self.HER_memory) > self.HER_buffer_batch_size
\ No newline at end of file
diff --git a/agents/DQN_agents/DQN_With_Fixed_Q_Targets.py b/agents/DQN_agents/DQN_With_Fixed_Q_Targets.py
deleted file mode 100644
index 7cdd492189aab9473ccaae6fe6aca21c8a848a0c..0000000000000000000000000000000000000000
--- a/agents/DQN_agents/DQN_With_Fixed_Q_Targets.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import copy
-
-from agents.Base_Agent import Base_Agent
-from agents.DQN_agents.DQN import DQN
-
-class DQN_With_Fixed_Q_Targets(DQN):
-    """A DQN agent that uses an older version of the q_network as the target network"""
-    agent_name = "DQN with Fixed Q Targets"
-    def __init__(self, config):
-        DQN.__init__(self, config)
-        self.q_network_target = self.create_NN(input_dim=self.state_size, output_dim=self.action_size)
-        Base_Agent.copy_model_over(from_model=self.q_network_local, to_model=self.q_network_target)
-
-    def learn(self, experiences=None):
-        """Runs a learning iteration for the Q network"""
-        super(DQN_With_Fixed_Q_Targets, self).learn(experiences=experiences)
-        self.soft_update_of_target_network(self.q_network_local, self.q_network_target,
-                                           self.hyperparameters["tau"])  # Update the target network
-
-    def compute_q_values_for_next_states(self, next_states):
-        """Computes the q_values for next state we will use to create the loss to train the Q network"""
-        Q_targets_next = self.q_network_target(next_states).detach().max(1)[0].unsqueeze(1)
-        return Q_targets_next
\ No newline at end of file
diff --git a/agents/DQN_agents/Dueling_DDQN.py b/agents/DQN_agents/Dueling_DDQN.py
deleted file mode 100644
index 793ccba92570de0be1ea6d27831638145d3bf8b9..0000000000000000000000000000000000000000
--- a/agents/DQN_agents/Dueling_DDQN.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import torch
-from torch import optim
-from agents.Base_Agent import Base_Agent
-from agents.DQN_agents.DDQN import DDQN
-
-class Dueling_DDQN(DDQN):
-    """A dueling double DQN agent as described in the paper http://proceedings.mlr.press/v48/wangf16.pdf"""
-    agent_name = "Dueling DDQN"
-
-    def __init__(self, config):
-        DDQN.__init__(self, config)
-        self.q_network_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size + 1)
-        self.q_network_optimizer = optim.Adam(self.q_network_local.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4)
-        self.q_network_target = self.create_NN(input_dim=self.state_size, output_dim=self.action_size + 1)
-        Base_Agent.copy_model_over(from_model=self.q_network_local, to_model=self.q_network_target)
-
-    def pick_action(self, state=None):
-        """Uses the local Q network and an epsilon greedy policy to pick an action"""
-        # PyTorch only accepts mini-batches and not single observations so we have to use unsqueeze to add
-        # a "fake" dimension to make it a mini-batch rather than a single observation
-        if state is None: state = self.state
-        state = torch.from_numpy(state).float().unsqueeze(0).to(self.device)
-        if len(state.shape) < 2: state = state.unsqueeze(0)
-        self.q_network_local.eval()
-        with torch.no_grad():
-            action_values = self.q_network_local(state)
-            action_values = action_values[:, :-1] #because we treat the last output element as state-value and rest as advantages
-        self.q_network_local.train()
-        action = self.exploration_strategy.perturb_action_for_exploration_purposes({"action_values": action_values,
-                                                                                    "turn_off_exploration": self.turn_off_exploration,
-                                                                                    "episode_number": self.episode_number})
-        return action
-
-    def compute_q_values_for_next_states(self, next_states):
-        """Computes the q_values for next state we will use to create the loss to train the Q network. Double DQN
-        uses the local index to pick the maximum q_value action and then the target network to calculate the q_value.
-        The reasoning behind this is that it will help stop the network from overestimating q values"""
-        max_action_indexes = self.q_network_local(next_states)[:, :-1].detach().argmax(1)
-        duelling_network_output = self.q_network_target(next_states)
-        q_values = self.calculate_duelling_q_values(duelling_network_output)
-        Q_targets_next = q_values.gather(1, max_action_indexes.unsqueeze(1))
-        return Q_targets_next
-
-    def calculate_duelling_q_values(self, duelling_q_network_output):
-        """Calculates the q_values using the duelling network architecture. This is equation (9) in the paper
-        referenced at the top of the class"""
-        state_value = duelling_q_network_output[:, -1]
-        avg_advantage = torch.mean(duelling_q_network_output[:, :-1], dim=1)
-        q_values = state_value.unsqueeze(1) + (duelling_q_network_output[:, :-1] - avg_advantage.unsqueeze(1))
-        return q_values
-
-    def compute_expected_q_values(self, states, actions):
-        """Computes the expected q_values we will use to create the loss to train the Q network"""
-        duelling_network_output = self.q_network_local(states)
-        q_values = self.calculate_duelling_q_values(duelling_network_output)
-        Q_expected = q_values.gather(1, actions.long())
-        return Q_expected
-
-
-
-
-
-
-
diff --git a/agents/DQN_agents/__init__.py b/agents/DQN_agents/__init__.py
deleted file mode 100644
index c7df41cea8f9e72cc7e67b84e34436626f9b5875..0000000000000000000000000000000000000000
--- a/agents/DQN_agents/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-import os, sys; sys.path.append(os.path.dirname(os.path.realpath(__file__)))
\ No newline at end of file
diff --git a/agents/DQN_agents/__pycache__/DDQN.cpython-310.pyc b/agents/DQN_agents/__pycache__/DDQN.cpython-310.pyc
deleted file mode 100644
index c6f9ed9115b85f14fa5a4276216ecbe574b3a894..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DDQN.cpython-310.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DDQN.cpython-38.pyc b/agents/DQN_agents/__pycache__/DDQN.cpython-38.pyc
deleted file mode 100644
index ee18712cc33c41df248277bbc0d82b9d1dd43468..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DDQN.cpython-38.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DDQN.cpython-39.pyc b/agents/DQN_agents/__pycache__/DDQN.cpython-39.pyc
deleted file mode 100644
index 82fc5b3144db3043a942fa053f58edff0970aa0e..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DDQN.cpython-39.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DDQN_With_Prioritised_Experience_Replay.cpython-310.pyc b/agents/DQN_agents/__pycache__/DDQN_With_Prioritised_Experience_Replay.cpython-310.pyc
deleted file mode 100644
index d9d151469bb24d1c762d1f5d08b49817f295428c..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DDQN_With_Prioritised_Experience_Replay.cpython-310.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DDQN_With_Prioritised_Experience_Replay.cpython-39.pyc b/agents/DQN_agents/__pycache__/DDQN_With_Prioritised_Experience_Replay.cpython-39.pyc
deleted file mode 100644
index c1919f0b6c6ed2d271e57516f86ceb3573148642..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DDQN_With_Prioritised_Experience_Replay.cpython-39.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DQN.cpython-310.pyc b/agents/DQN_agents/__pycache__/DQN.cpython-310.pyc
deleted file mode 100644
index 4dec91f1c1eb1d3102b538f4aedb1100ce0d9db5..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DQN.cpython-310.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DQN.cpython-39.pyc b/agents/DQN_agents/__pycache__/DQN.cpython-39.pyc
deleted file mode 100644
index d3d00fa11df9efe5c1b022aa417ad28e91f18341..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DQN.cpython-39.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DQN_HER.cpython-310.pyc b/agents/DQN_agents/__pycache__/DQN_HER.cpython-310.pyc
deleted file mode 100644
index 300b859fd6eb005b9bbcfab0b213e0ed9fbac2c8..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DQN_HER.cpython-310.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DQN_HER.cpython-39.pyc b/agents/DQN_agents/__pycache__/DQN_HER.cpython-39.pyc
deleted file mode 100644
index 356b2570c72409ff52cec8f6129f7faf54ce490b..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DQN_HER.cpython-39.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DQN_With_Fixed_Q_Targets.cpython-310.pyc b/agents/DQN_agents/__pycache__/DQN_With_Fixed_Q_Targets.cpython-310.pyc
deleted file mode 100644
index 79bfe6f815a8db64db78a51f8a7631dd3e2dc02b..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DQN_With_Fixed_Q_Targets.cpython-310.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DQN_With_Fixed_Q_Targets.cpython-38.pyc b/agents/DQN_agents/__pycache__/DQN_With_Fixed_Q_Targets.cpython-38.pyc
deleted file mode 100644
index 925f9d836942ebc5976a5ed7d45cbea099674ff0..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DQN_With_Fixed_Q_Targets.cpython-38.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/DQN_With_Fixed_Q_Targets.cpython-39.pyc b/agents/DQN_agents/__pycache__/DQN_With_Fixed_Q_Targets.cpython-39.pyc
deleted file mode 100644
index c7261847d33c284b8077e86e8140a2489cc1df93..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/DQN_With_Fixed_Q_Targets.cpython-39.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/Dueling_DDQN.cpython-310.pyc b/agents/DQN_agents/__pycache__/Dueling_DDQN.cpython-310.pyc
deleted file mode 100644
index 856c7eab40e1df46bd5fef6d7628c1e0c1dd722a..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/Dueling_DDQN.cpython-310.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/Dueling_DDQN.cpython-39.pyc b/agents/DQN_agents/__pycache__/Dueling_DDQN.cpython-39.pyc
deleted file mode 100644
index be5f0bdfdad29674fa27769e4fe127b02a0553c3..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/Dueling_DDQN.cpython-39.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/__init__.cpython-310.pyc b/agents/DQN_agents/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index fdc109ac604a86c895ed32ec4246c6848c6a0b6f..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/__init__.cpython-310.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/__init__.cpython-38.pyc b/agents/DQN_agents/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index 18974bbde85c27bec71d0f514d9d9e5316720648..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/agents/DQN_agents/__pycache__/__init__.cpython-39.pyc b/agents/DQN_agents/__pycache__/__init__.cpython-39.pyc
deleted file mode 100644
index cd6717db4458a69814c11ad8d8a26ac77e2c89f4..0000000000000000000000000000000000000000
Binary files a/agents/DQN_agents/__pycache__/__init__.cpython-39.pyc and /dev/null differ
diff --git a/agents/HER_Base.py b/agents/HER_Base.py
deleted file mode 100644
index f7de66c2a7e61ac057af894f891b3ef8651ffb06..0000000000000000000000000000000000000000
--- a/agents/HER_Base.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import torch
-import numpy as np
-from utilities.data_structures.Replay_Buffer import Replay_Buffer
-from utilities.Utility_Functions import abstract
-
-@abstract
-class HER_Base(object):
-    """Contains methods needed to turn an algorithm into a hindsight experience replay (HER) algorithm"""
-    def __init__(self, buffer_size, batch_size, HER_sample_proportion):
-        self.HER_memory = Replay_Buffer(buffer_size, batch_size, self.config.seed)
-        self.ordinary_buffer_batch_size = int(batch_size * (1.0 - HER_sample_proportion))
-        self.HER_buffer_batch_size = batch_size - self.ordinary_buffer_batch_size
-
-    def reset_game(self):
-        """Resets the game information so we are ready to play a new episode"""
-        self.state_dict = self.environment.reset()
-        self.observation = self.state_dict["observation"]
-        self.desired_goal = self.state_dict["desired_goal"]
-        self.achieved_goal = self.state_dict["achieved_goal"]
-
-        self.state = self.create_state_from_observation_and_desired_goal(self.observation, self.desired_goal)
-        self.next_state = None
-        self.action = None
-        self.reward = None
-        self.done = False
-
-        self.episode_states = []
-        self.episode_rewards = []
-        self.episode_actions = []
-        self.episode_next_states = []
-        self.episode_dones = []
-
-        self.episode_desired_goals = []
-        self.episode_achieved_goals = []
-        self.episode_observations = []
-
-        self.episode_next_desired_goals = []
-        self.episode_next_achieved_goals = []
-        self.episode_next_observations = []
-
-        self.total_episode_score_so_far = 0
-
-    def track_changeable_goal_episodes_data(self):
-        """Saves the data from the recent episodes in a way compatible with changeable goal environments"""
-        self.episode_rewards.append(self.reward)
-        self.episode_actions.append(self.action)
-        self.episode_dones.append(self.done)
-
-        self.episode_states.append(self.state)
-        self.episode_next_states.append(self.next_state)
-
-        self.episode_desired_goals.append(self.state_dict["desired_goal"])
-        self.episode_achieved_goals.append(self.state_dict["achieved_goal"])
-        self.episode_observations.append(self.state_dict["observation"])
-
-        self.episode_next_desired_goals.append(self.next_state_dict["desired_goal"])
-        self.episode_next_achieved_goals.append(self.next_state_dict["achieved_goal"])
-        self.episode_next_observations.append(self.next_state_dict["observation"])
-
-    def conduct_action_in_changeable_goal_envs(self, action):
-        """Adapts conduct_action from base agent so that can handle changeable goal environments"""
-        self.next_state_dict, self.reward, self.done, _ = self.environment.step(action)
-        self.total_episode_score_so_far += self.reward
-        if self.hyperparameters["clip_rewards"]:
-            self.reward = max(min(self.reward, 1.0), -1.0)
-        self.observation = self.next_state_dict["observation"]
-        self.desired_goal = self.next_state_dict["desired_goal"]
-        self.achieved_goal = self.next_state_dict["achieved_goal"]
-        self.next_state =  self.create_state_from_observation_and_desired_goal(self.observation, self.desired_goal)
-
-
-    def create_state_from_observation_and_desired_goal(self, observation, desired_goal):
-        return np.concatenate((observation, desired_goal))
-
-    def save_alternative_experience(self):
-        """Saves the experiences as if the final state visited in the episode was the goal state"""
-        new_goal = self.achieved_goal
-        new_states = [self.create_state_from_observation_and_desired_goal(observation, new_goal) for observation in self.episode_observations]
-        new_next_states = [self.create_state_from_observation_and_desired_goal(observation, new_goal) for observation in
-                      self.episode_next_observations]
-        new_rewards = [self.environment.compute_reward(next_achieved_goal, new_goal, None) for next_achieved_goal in  self.episode_next_achieved_goals]
-
-        if self.hyperparameters["clip_rewards"]:
-            new_rewards = [max(min(reward, 1.0), -1.0) for reward in new_rewards]
-
-        self.HER_memory.add_experience(new_states, self.episode_actions, new_rewards, new_next_states, self.episode_dones)
-
-    def sample_from_HER_and_Ordinary_Buffer(self):
-        """Samples from the ordinary replay buffer and HER replay buffer according to a proportion specified in config"""
-        states, actions, rewards, next_states, dones = self.memory.sample(self.ordinary_buffer_batch_size)
-        HER_states, HER_actions, HER_rewards, HER_next_states, HER_dones = self.HER_memory.sample(self.HER_buffer_batch_size)
-
-        states = torch.cat((states, HER_states))
-        actions = torch.cat((actions, HER_actions))
-        rewards = torch.cat((rewards, HER_rewards))
-        next_states = torch.cat((next_states, HER_next_states))
-        dones = torch.cat((dones, HER_dones))
-        return states, actions, rewards, next_states, dones
-
-
diff --git a/agents/Trainer.py b/agents/Trainer.py
deleted file mode 100644
index 56d7f252d2540ac385cb24130ea70f9ab8ae580e..0000000000000000000000000000000000000000
--- a/agents/Trainer.py
+++ /dev/null
@@ -1,304 +0,0 @@
-import copy
-import random
-import pickle
-import os
-import gym
-from gym import wrappers
-import numpy as np
-import matplotlib.pyplot as plt
-
-class Trainer(object):
-    """Runs games for given agents. Optionally will visualise and save the results"""
-    def __init__(self, config, agents):
-        self.config = config
-        self.agents = agents
-        self.agent_to_agent_group = self.create_agent_to_agent_group_dictionary()
-        self.agent_to_color_group = self.create_agent_to_color_dictionary()
-        self.results = None
-        self.signals_result = None
-        self.colors = ["red", "blue", "green", "orange", "yellow", "purple"]
-        self.colour_ix = 0
-        self.y_limits = None
-
-    def create_agent_to_agent_group_dictionary(self):
-        """Creates a dictionary that maps an agent to their wider agent group"""
-        agent_to_agent_group_dictionary = {
-            "DQN": "DQN_Agents",
-            "DQN-HER": "DQN_Agents",
-            "DDQN": "DQN_Agents",
-            "DDQN with Prioritised Replay": "DQN_Agents",
-            "DQN with Fixed Q Targets": "DQN_Agents",
-            "Duelling DQN": "DQN_Agents",
-            "PPO": "Policy_Gradient_Agents",
-            "REINFORCE": "Policy_Gradient_Agents",
-            "Genetic_Agent": "Stochastic_Policy_Search_Agents",
-            "Hill Climbing": "Stochastic_Policy_Search_Agents",
-            "DDPG": "Actor_Critic_Agents",
-            "DDPG-HER": "Actor_Critic_Agents",
-            "TD3": "Actor_Critic_Agents",
-            "A2C": "Actor_Critic_Agents",
-            "A3C": "Actor_Critic_Agents",
-            "h-DQN": "h_DQN",
-            "SNN-HRL": "SNN_HRL",
-            "HIRO": "HIRO",
-            "SAC": "Actor_Critic_Agents",
-            "HRL": "HRL",
-            "Model_HRL": "HRL",
-            "DIAYN": "DIAYN",
-            "Dueling DDQN": "DQN_Agents"
-        }
-        return agent_to_agent_group_dictionary
-
-    def create_agent_to_color_dictionary(self):
-        """Creates a dictionary that maps an agent to a hex color (for plotting purposes)
-        See https://en.wikipedia.org/wiki/Web_colors and https://htmlcolorcodes.com/ for hex colors"""
-        agent_to_color_dictionary = {
-            "DQN": "#0000FF",
-            "DQN with Fixed Q Targets": "#1F618D",
-            "DDQN": "#2980B9",
-            "DDQN with Prioritised Replay": "#7FB3D5",
-            "Dueling DDQN": "#22DAF3",
-            "PPO": "#5B2C6F",
-            "DDPG": "#800000",
-            "DQN-HER": "#008000",
-            "DDPG-HER": "#008000",
-            "TD3": "#E74C3C",
-            "h-DQN": "#D35400",
-            "SNN-HRL": "#800000",
-            "A3C": "#E74C3C",
-            "A2C": "#F1948A",
-            "SAC": "#1C2833",
-            "DIAYN": "#F322CD",
-            "HRL": "#0E0F0F"
-        }
-        return agent_to_color_dictionary
-
-    def run_games_for_agents(self):
-        """Run a set of games for each agent. Optionally visualising and/or saving the results"""
-        self.results = self.create_object_to_store_results()
-        self.signals_result = self.create_object_to_store_results()
-        for agent_number, agent_class in enumerate(self.agents):
-            agent_name = agent_class.agent_name
-            self.run_games_for_agent(agent_number + 1, agent_class)
-            if self.config.visualise_overall_agent_results:
-                agent_rolling_score_results = [results[1] for results in  self.results[agent_name]]
-                self.visualise_overall_agent_results(agent_rolling_score_results, agent_name, show_mean_and_std_range=True, y_limits=self.y_limits)
-        if self.config.file_to_save_data_results: self.save_obj(self.results, self.config.file_to_save_data_results)
-        if self.config.file_to_save_results_graph: plt.savefig(self.config.file_to_save_results_graph, bbox_inches="tight")
-        plt.show()
-        return self.results
-
-    def create_object_to_store_results(self):
-        """Creates a dictionary that we will store the results in if it doesn't exist, otherwise it loads it up"""
-        if self.config.overwrite_existing_results_file or not self.config.file_to_save_data_results or not os.path.isfile(self.config.file_to_save_data_results):
-            results = {}
-        else: results = self.load_obj(self.config.file_to_save_data_results)
-        return results
-
-    def run_games_for_agent(self, agent_number, agent_class):
-        """Runs a set of games for a given agent, saving the results in self.results"""
-        agent_results = []
-        agent_name = agent_class.agent_name
-        agent_group = self.agent_to_agent_group[agent_name]
-        agent_round = 1
-        for run in range(self.config.runs_per_agent):
-            agent_config = copy.deepcopy(self.config)
-
-            if self.environment_has_changeable_goals(agent_config.environment) and self.agent_cant_handle_changeable_goals_without_flattening(agent_name):
-                print("Flattening changeable-goal environment for agent {}".format(agent_name))
-                agent_config.environment = gym.wrappers.FlattenDictWrapper(agent_config.environment,
-                                                                           dict_keys=["observation", "desired_goal"])
-
-            if self.config.randomise_random_seed: agent_config.seed = random.randint(0, 2**32 - 2)
-            agent_config.hyperparameters = agent_config.hyperparameters[agent_group]
-            print("AGENT NAME: {}".format(agent_name))
-            print("\033[1m" + "{}.{}: {}".format(agent_number, agent_round, agent_name) + "\033[0m", flush=True)
-            agent = agent_class(agent_config)
-            self.environment_name = agent.environment_title
-            print(agent.hyperparameters)
-            print("RANDOM SEED " , agent_config.seed)
-            game_scores, rolling_scores, time_taken, game_signals = agent.run_n_episodes()
-            print("Time taken: {}".format(time_taken), flush=True)
-            self.print_two_empty_lines()
-            agent_results.append([game_scores, rolling_scores, len(rolling_scores), -1 * max(rolling_scores), time_taken, game_signals])
-            if self.config.visualise_individual_results:
-                self.visualise_overall_agent_results([rolling_scores], agent_name, show_each_run=True, y_limits=self.y_limits)
-                plt.show()
-            agent_round += 1
-        self.results[agent_name] = agent_results
-
-    def environment_has_changeable_goals(self, env):
-        """Determines whether environment is such that for each episode there is a different goal or not"""
-        return isinstance(env.reset(), dict)
-
-    def agent_cant_handle_changeable_goals_without_flattening(self, agent_name):
-        """Boolean indicating whether the agent is set up to handle changeable goals"""
-        return "HER" not in agent_name
-
-    def visualise_overall_agent_results(self, agent_results, agent_name, show_mean_and_std_range=False, show_each_run=False,
-                                        color=None, ax=None, title=None, y_limits=None):
-        """Visualises the results for one agent"""
-        assert isinstance(agent_results, list), "agent_results must be a list of lists, 1 set of results per list"
-        assert isinstance(agent_results[0], list), "agent_results must be a list of lists, 1 set of results per list"
-        assert bool(show_mean_and_std_range) ^ bool(show_each_run), "either show_mean_and_std_range or show_each_run must be true"
-        if not ax: ax = plt.gca()
-        if not color: color =  self.agent_to_color_group[agent_name]
-        if show_mean_and_std_range:
-            mean_minus_x_std, mean_results, mean_plus_x_std = self.get_mean_and_standard_deviation_difference_results(agent_results)
-            x_vals = list(range(len(mean_results)))
-            ax.plot(x_vals, mean_results, label=agent_name, color=color)
-            ax.plot(x_vals, mean_plus_x_std, color=color, alpha=0.1)
-            ax.plot(x_vals, mean_minus_x_std, color=color, alpha=0.1)
-            ax.fill_between(x_vals, y1=mean_minus_x_std, y2=mean_plus_x_std, alpha=0.1, color=color)
-        else:
-            for ix, result in enumerate(agent_results):
-                x_vals = list(range(len(agent_results[0])))
-                plt.plot(x_vals, result, label=agent_name + "_{}".format(ix+1), color=color)
-                color = self.get_next_color()
-
-        ax.set_facecolor('xkcd:white')
-
-        # Shrink current axis's height by 10% on the bottom
-        box = ax.get_position()
-        ax.set_position([box.x0, box.y0 + box.height * 0.05,
-                         box.width, box.height * 0.95])
-
-        # Put a legend below current axis
-        ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15),
-                  fancybox=True, shadow=True, ncol=3)
-
-        if not title: title = self.environment_name
-
-        ax.set_title(title, fontsize=15, fontweight='bold')
-        ax.set_ylabel('Rolling Episode Scores')
-        ax.set_xlabel('Episode Number')
-        self.hide_spines(ax, ['right', 'top'])
-        ax.set_xlim([0, x_vals[-1]])
-
-        if y_limits is None: y_min, y_max = self.get_y_limits(agent_results)
-        else: y_min, y_max = y_limits
-
-        ax.set_ylim([y_min, y_max])
-
-        if self.config.show_solution_score:
-            self.draw_horizontal_line_with_label(ax, y_value=self.config.environment.get_score_to_win(), x_min=0,
-                                        x_max=self.config.num_episodes_to_run * 1.02, label="Target \n score")
-
-    def get_y_limits(self, results):
-        """Extracts the minimum and maximum seen y_values from a set of results"""
-        min_result = float("inf")
-        max_result = float("-inf")
-        for result in results:
-            temp_max = np.max(result)
-            temp_min = np.min(result)
-            if temp_max > max_result:
-                max_result = temp_max
-            if temp_min < min_result:
-                min_result = temp_min
-        return min_result, max_result
-
-    def get_next_color(self):
-        """Gets the next color in list self.colors. If it gets to the end then it starts from beginning"""
-        self.colour_ix += 1
-        if self.colour_ix >= len(self.colors): self.colour_ix = 0
-        color = self.colors[self.colour_ix]
-        return color
-
-    def get_mean_and_standard_deviation_difference_results(self, results):
-        """From a list of lists of agent results it extracts the mean results and the mean results plus or minus
-         some multiple of the standard deviation"""
-        def get_results_at_a_time_step(results, timestep):
-            results_at_a_time_step = [result[timestep] for result in results]
-            return results_at_a_time_step
-        def get_standard_deviation_at_time_step(results, timestep):
-            results_at_a_time_step = [result[timestep] for result in results]
-            return np.std(results_at_a_time_step)
-        mean_results = [np.mean(get_results_at_a_time_step(results, timestep)) for timestep in range(len(results[0]))]
-        mean_minus_x_std = [mean_val - self.config.standard_deviation_results * get_standard_deviation_at_time_step(results, timestep) for
-                            timestep, mean_val in enumerate(mean_results)]
-        mean_plus_x_std = [mean_val + self.config.standard_deviation_results * get_standard_deviation_at_time_step(results, timestep) for
-                           timestep, mean_val in enumerate(mean_results)]
-        return mean_minus_x_std, mean_results, mean_plus_x_std
-
-    def hide_spines(self, ax, spines_to_hide):
-        """Hides splines on a matplotlib image"""
-        for spine in spines_to_hide:
-            ax.spines[spine].set_visible(False)
-
-    def ignore_points_after_game_solved(self, mean_minus_x_std, mean_results, mean_plus_x_std):
-        """Removes the datapoints after the mean result achieves the score required to solve the game"""
-        for ix in range(len(mean_results)):
-            if mean_results[ix] >= self.config.environment.get_score_to_win():
-                break
-        return mean_minus_x_std[:ix], mean_results[:ix], mean_plus_x_std[:ix]
-
-    def draw_horizontal_line_with_label(self, ax, y_value, x_min, x_max, label):
-        """Draws a dotted horizontal line on the given image at the given point and with the given label"""
-        ax.hlines(y=y_value, xmin=x_min, xmax=x_max,
-                  linewidth=2, color='k', linestyles='dotted', alpha=0.5)
-        ax.text(x_max, y_value * 0.965, label)
-
-    def print_two_empty_lines(self):
-        print("-----------------------------------------------------------------------------------")
-        print("-----------------------------------------------------------------------------------")
-        print(" ")
-
-    def save_obj(self, obj, name):
-        """Saves given object as a pickle file"""
-        if name[-4:] != ".pkl":
-            name += ".pkl"
-        with open(name, 'wb') as f:
-            pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
-
-    def load_obj(self, name):
-        """Loads a pickle file object"""
-        with open(name, 'rb') as f:
-            return pickle.load(f)
-
-    def visualise_preexisting_results(self, save_image_path=None, data_path=None, colors=None, show_image=True, ax=None,
-                                      title=None, y_limits=None):
-        """Visualises saved data results and then optionally saves the image"""
-        if not data_path: preexisting_results = self.create_object_to_store_results()
-        else: preexisting_results = self.load_obj(data_path)
-        for ix, agent in enumerate(list(preexisting_results.keys())):
-            agent_rolling_score_results = [results[1] for results in preexisting_results[agent]]
-            if colors: color = colors[ix]
-            else: color = None
-            self.visualise_overall_agent_results(agent_rolling_score_results, agent, show_mean_and_std_range=True,
-                                                 color=color, ax=ax, title=title, y_limits=y_limits)
-        if save_image_path: plt.savefig(save_image_path, bbox_inches="tight")
-        if show_image: plt.show()
-
-    def visualise_set_of_preexisting_results(self, results_data_paths, save_image_path=None, show_image=True, plot_titles=None,
-                                             y_limits=[None,None]):
-        """Visualises a set of preexisting results on 1 plot by making subplots"""
-        assert isinstance(results_data_paths, list), "all_results must be a list of data paths"
-
-        num_figures = len(results_data_paths)
-        col_width = 15
-        row_height = 6
-
-        if num_figures <= 2:
-            fig, axes = plt.subplots(1, num_figures, figsize=(col_width, row_height ))
-        elif num_figures <= 4:
-            fig, axes = plt.subplots(2, num_figures, figsize=(row_height, col_width))
-        else:
-            raise ValueError("Need to tell this method how to deal with more than 4 plots")
-        for ax_ix in range(len(results_data_paths)):
-            self.visualise_preexisting_results(show_image=False, data_path=results_data_paths[ax_ix], ax=axes[ax_ix],
-                                               title=plot_titles[ax_ix], y_limits=y_limits[ax_ix])
-        fig.tight_layout()
-        fig.subplots_adjust(bottom=0.25)
-
-        if save_image_path: plt.savefig(save_image_path) #, bbox_inches="tight")
-        if show_image: plt.show()
-
-        # ax.imshow(z, aspect="auto")
-
-
-
-
-
-
-
-
diff --git a/agents/__init__.py b/agents/__init__.py
deleted file mode 100644
index c7df41cea8f9e72cc7e67b84e34436626f9b5875..0000000000000000000000000000000000000000
--- a/agents/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-import os, sys; sys.path.append(os.path.dirname(os.path.realpath(__file__)))
\ No newline at end of file
diff --git a/agents/__pycache__/Base_Agent.cpython-310.pyc b/agents/__pycache__/Base_Agent.cpython-310.pyc
deleted file mode 100644
index a898a8a38a02bf46199849c1f6f3dfd08ab373bd..0000000000000000000000000000000000000000
Binary files a/agents/__pycache__/Base_Agent.cpython-310.pyc and /dev/null differ
diff --git a/agents/__pycache__/Base_Agent.cpython-38.pyc b/agents/__pycache__/Base_Agent.cpython-38.pyc
deleted file mode 100644
index 4d8ddf73e6d6144f852f5187bef49408d1c58e83..0000000000000000000000000000000000000000
Binary files a/agents/__pycache__/Base_Agent.cpython-38.pyc and /dev/null differ
diff --git a/agents/__pycache__/Base_Agent.cpython-39.pyc b/agents/__pycache__/Base_Agent.cpython-39.pyc
deleted file mode 100644
index a22ca47022133db818b197243c93ee896eda7740..0000000000000000000000000000000000000000
Binary files a/agents/__pycache__/Base_Agent.cpython-39.pyc and /dev/null differ
diff --git a/agents/__pycache__/HER_Base.cpython-310.pyc b/agents/__pycache__/HER_Base.cpython-310.pyc
deleted file mode 100644
index df50f4af285706e7f34bbd093c1e5832499eeaca..0000000000000000000000000000000000000000
Binary files a/agents/__pycache__/HER_Base.cpython-310.pyc and /dev/null differ
diff --git a/agents/__pycache__/HER_Base.cpython-39.pyc b/agents/__pycache__/HER_Base.cpython-39.pyc
deleted file mode 100644
index 26e4805586d5ee609dca8237c1ccceb076e73a51..0000000000000000000000000000000000000000
Binary files a/agents/__pycache__/HER_Base.cpython-39.pyc and /dev/null differ
diff --git a/agents/__pycache__/Trainer.cpython-310.pyc b/agents/__pycache__/Trainer.cpython-310.pyc
deleted file mode 100644
index c3deb25119ccd90ddffb77173dd5c4f43f026b02..0000000000000000000000000000000000000000
Binary files a/agents/__pycache__/Trainer.cpython-310.pyc and /dev/null differ
diff --git a/agents/__pycache__/Trainer.cpython-39.pyc b/agents/__pycache__/Trainer.cpython-39.pyc
deleted file mode 100644
index 60a56d5f33bc5ee88a3642bc9b2afdaa079cfa3a..0000000000000000000000000000000000000000
Binary files a/agents/__pycache__/Trainer.cpython-39.pyc and /dev/null differ
diff --git a/agents/__pycache__/__init__.cpython-310.pyc b/agents/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index b2188918c014a02be37e6afedccb7f9e8a2e0da8..0000000000000000000000000000000000000000
Binary files a/agents/__pycache__/__init__.cpython-310.pyc and /dev/null differ
diff --git a/agents/__pycache__/__init__.cpython-38.pyc b/agents/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index d9915ce21a539284c94d5b6bc8ab24663ed4bae9..0000000000000000000000000000000000000000
Binary files a/agents/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/agents/__pycache__/__init__.cpython-39.pyc b/agents/__pycache__/__init__.cpython-39.pyc
deleted file mode 100644
index c99960406ec57b5ac1f5fa1af4062ca3c081ef57..0000000000000000000000000000000000000000
Binary files a/agents/__pycache__/__init__.cpython-39.pyc and /dev/null differ
diff --git a/agents/actor_critic_agents/A2C.py b/agents/actor_critic_agents/A2C.py
deleted file mode 100644
index 24f1515a39f3993d3a9b11665fc00d97a3f2a712..0000000000000000000000000000000000000000
--- a/agents/actor_critic_agents/A2C.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from agents.actor_critic_agents.A3C import A3C
-
-class A2C(A3C):
-    """Synchronous version of A2C algorithm from deepmind paper https://arxiv.org/pdf/1602.01783.pdf. The only
-    difference between this and the A3C is that gradient updates get done in a batch rather than 1 by 1 as the gradients
-    come in"""
-    agent_name = "A2C"
-    def __init__(self, config):
-        super(A2C, self).__init__(config)
-
-    def update_shared_model(self, gradient_updates_queue):
-        """Worker that updates the shared model with gradients as they get put into the queue"""
-        while True:
-            gradients_seen = 0
-            while gradients_seen < self.worker_processes:
-                if gradients_seen == 0:
-                    gradients = gradient_updates_queue.get()
-                else:
-                    new_grads = gradient_updates_queue.get()
-                    gradients = [grad + new_grad for grad, new_grad in zip(gradients, new_grads)]
-                gradients_seen += 1
-            self.actor_critic_optimizer.zero_grad()
-            for grads, params in zip(gradients, self.actor_critic.parameters()):
-                params._grad = grads
-            self.actor_critic_optimizer.step()
\ No newline at end of file
diff --git a/agents/actor_critic_agents/A3C.py b/agents/actor_critic_agents/A3C.py
deleted file mode 100644
index 966b6cf34fde7701657214c354b463760f3ebc45..0000000000000000000000000000000000000000
--- a/agents/actor_critic_agents/A3C.py
+++ /dev/null
@@ -1,229 +0,0 @@
-import copy
-import random
-import time
-import numpy as np
-import torch
-from torch import multiprocessing
-from torch.multiprocessing import Queue
-from torch.optim import Adam
-from agents.Base_Agent import Base_Agent
-from utilities.Utility_Functions import create_actor_distribution, SharedAdam
-
-class A3C(Base_Agent):
-    """Actor critic A3C algorithm from deepmind paper https://arxiv.org/pdf/1602.01783.pdf"""
-    agent_name = "A3C"
-    def __init__(self, config):
-        super(A3C, self).__init__(config)
-        self.num_processes = multiprocessing.cpu_count()
-        self.worker_processes = max(1, self.num_processes - 2)
-        self.actor_critic = self.create_NN(input_dim=self.state_size, output_dim=[self.action_size, 1])
-        self.actor_critic_optimizer = SharedAdam(self.actor_critic.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4)
-
-    def run_n_episodes(self):
-        """Runs game to completion n times and then summarises results and saves model (if asked to)"""
-        start = time.time()
-        results_queue = Queue()
-        gradient_updates_queue = Queue()
-        episode_number = multiprocessing.Value('i', 0)
-        self.optimizer_lock = multiprocessing.Lock()
-        episodes_per_process = int(self.config.num_episodes_to_run / self.worker_processes) + 1
-        processes = []
-        self.actor_critic.share_memory()
-        self.actor_critic_optimizer.share_memory()
-
-        optimizer_worker = multiprocessing.Process(target=self.update_shared_model, args=(gradient_updates_queue,))
-        optimizer_worker.start(,
-
-        for process_num in range(self.worker_processes):
-            worker = Actor_Critic_Worker(process_num, copy.deepcopy(self.environment), self.actor_critic, episode_number, self.optimizer_lock,
-                                    self.actor_critic_optimizer, self.config, episodes_per_process,
-                                    self.hyperparameters["epsilon_decay_rate_denominator"],
-                                    self.action_size, self.action_types,
-                                    results_queue, copy.deepcopy(self.actor_critic), gradient_updates_queue)
-            worker.start()
-            processes.append(worker)
-        self.print_results(episode_number, results_queue)
-        for worker in processes:
-            worker.join()
-        optimizer_worker.kill()
-
-        time_taken = time.time() - start
-        return self.game_full_episode_scores, self.rolling_results, time_taken
-
-    def print_results(self, episode_number, results_queue):
-        """Worker that prints out results as they get put into a queue"""
-        while True:
-            with episode_number.get_lock():
-                carry_on = episode_number.value < self.config.num_episodes_to_run
-            if carry_on:
-                if not results_queue.empty():
-                    self.total_episode_score_so_far = results_queue.get()
-                    self.save_and_print_result()
-            else: break
-
-    def update_shared_model(self, gradient_updates_queue):
-        """Worker that updates the shared model with gradients as they get put into the queue"""
-        while True:
-            gradients = gradient_updates_queue.get()
-            with self.optimizer_lock:
-                self.actor_critic_optimizer.zero_grad()
-                for grads, params in zip(gradients, self.actor_critic.parameters()):
-                    params._grad = grads  # maybe need to do grads.clone()
-                self.actor_critic_optimizer.step()
-
-class Actor_Critic_Worker(torch.multiprocessing.Process):
-    """Actor critic worker that will play the game for the designated number of episodes """
-    def __init__(self, worker_num, environment, shared_model, counter, optimizer_lock, shared_optimizer,
-                 config, episodes_to_run, epsilon_decay_denominator, action_size, action_types, results_queue,
-                 local_model, gradient_updates_queue):
-        super(Actor_Critic_Worker, self).__init__()
-        self.environment = environment
-        self.config = config
-        self.worker_num = worker_num
-
-        self.gradient_clipping_norm = self.config.hyperparameters["gradient_clipping_norm"]
-        self.discount_rate = self.config.hyperparameters["discount_rate"]
-        self.normalise_rewards = self.config.hyperparameters["normalise_rewards"]
-
-        self.action_size = action_size
-        self.set_seeds(self.worker_num)
-        self.shared_model = shared_model
-        self.local_model = local_model
-        self.local_optimizer = Adam(self.local_model.parameters(), lr=0.0, eps=1e-4)
-        self.counter = counter
-        self.optimizer_lock = optimizer_lock
-        self.shared_optimizer = shared_optimizer
-        self.episodes_to_run = episodes_to_run
-        self.epsilon_decay_denominator = epsilon_decay_denominator
-        self.exploration_worker_difference = self.config.hyperparameters["exploration_worker_difference"]
-        self.action_types = action_types
-        self.results_queue = results_queue
-        self.episode_number = 0
-
-        self.gradient_updates_queue = gradient_updates_queue
-
-    def set_seeds(self, worker_num):
-        """Sets random seeds for this worker"""
-        torch.manual_seed(self.config.seed + worker_num)
-        self.environment.seed(self.config.seed + worker_num)
-
-    def run(self):
-        """Starts the worker"""
-        torch.set_num_threads(1)
-        for ep_ix in range(self.episodes_to_run):
-            with self.optimizer_lock:
-                Base_Agent.copy_model_over(self.shared_model, self.local_model)
-            epsilon_exploration = self.calculate_new_exploration()
-            state = self.reset_game_for_worker()
-            done = False
-            self.episode_states = []
-            self.episode_actions = []
-            self.episode_rewards = []
-            self.episode_log_action_probabilities = []
-            self.critic_outputs = []
-
-            while not done:
-                action, action_log_prob, critic_outputs = self.pick_action_and_get_critic_values(self.local_model, state, epsilon_exploration)
-                next_state, reward, done, _ =  self.environment.step(action)
-                self.episode_states.append(state)
-                self.episode_actions.append(action)
-                self.episode_rewards.append(reward)
-                self.episode_log_action_probabilities.append(action_log_prob)
-                self.critic_outputs.append(critic_outputs)
-                state = next_state
-
-            total_loss = self.calculate_total_loss()
-            self.put_gradients_in_queue(total_loss)
-            self.episode_number += 1
-            with self.counter.get_lock():
-                self.counter.value += 1
-                self.results_queue.put(np.sum(self.episode_rewards))
-
-    def calculate_new_exploration(self):
-        """Calculates the new exploration parameter epsilon. It picks a random point within 3X above and below the
-        current epsilon"""
-        with self.counter.get_lock():
-            epsilon = 1.0 / (1.0 + (self.counter.value / self.epsilon_decay_denominator))
-        epsilon = max(0.0, random.uniform(epsilon / self.exploration_worker_difference, epsilon * self.exploration_worker_difference))
-        return epsilon
-
-    def reset_game_for_worker(self):
-        """Resets the game environment so it is ready to play a new episode"""
-        state = self.environment.reset()
-        if self.action_types == "CONTINUOUS": self.noise.reset()
-        return state
-
-    def pick_action_and_get_critic_values(self, policy, state, epsilon_exploration=None):
-        """Picks an action using the policy"""
-        state = torch.from_numpy(state).float().unsqueeze(0)
-        model_output = policy.forward(state)
-        actor_output = model_output[:, list(range(self.action_size))] #we only use first set of columns to decide action, last column is state-value
-        critic_output = model_output[:, -1]
-        action_distribution = create_actor_distribution(self.action_types, actor_output, self.action_size)
-        action = action_distribution.sample().cpu().numpy()
-        if self.action_types == "CONTINUOUS": action += self.noise.sample()
-        if self.action_types == "DISCRETE":
-            if random.random() <= epsilon_exploration:
-                action = random.randint(0, self.action_size - 1)
-            else:
-                action = action[0]
-        action_log_prob = self.calculate_log_action_probability(action, action_distribution)
-        return action, action_log_prob, critic_output
-
-    def calculate_log_action_probability(self, actions, action_distribution):
-        """Calculates the log probability of the chosen action"""
-        policy_distribution_log_prob = action_distribution.log_prob(torch.Tensor([actions]))
-        return policy_distribution_log_prob
-
-    def calculate_total_loss(self):
-        """Calculates the actor loss + critic loss"""
-        discounted_returns = self.calculate_discounted_returns()
-        if self.normalise_rewards:
-            discounted_returns = self.normalise_discounted_returns(discounted_returns)
-        critic_loss, advantages = self.calculate_critic_loss_and_advantages(discounted_returns)
-        actor_loss = self.calculate_actor_loss(advantages)
-        total_loss = actor_loss + critic_loss
-        return total_loss
-
-    def calculate_discounted_returns(self):
-        """Calculates the cumulative discounted return for an episode which we will then use in a learning iteration"""
-        discounted_returns = [0]
-        for ix in range(len(self.episode_states)):
-            return_value = self.episode_rewards[-(ix + 1)] + self.discount_rate*discounted_returns[-1]
-            discounted_returns.append(return_value)
-        discounted_returns = discounted_returns[1:]
-        discounted_returns = discounted_returns[::-1]
-        return discounted_returns
-
-    def normalise_discounted_returns(self, discounted_returns):
-        """Normalises the discounted returns by dividing by mean and std of returns that episode"""
-        mean = np.mean(discounted_returns)
-        std = np.std(discounted_returns)
-        discounted_returns -= mean
-        discounted_returns /= (std + 1e-5)
-        return discounted_returns
-
-    def calculate_critic_loss_and_advantages(self, all_discounted_returns):
-        """Calculates the critic's loss and the advantages"""
-        critic_values = torch.cat(self.critic_outputs)
-        advantages = torch.Tensor(all_discounted_returns) - critic_values
-        advantages = advantages.detach()
-        critic_loss =  (torch.Tensor(all_discounted_returns) - critic_values)**2
-        critic_loss = critic_loss.mean()
-        return critic_loss, advantages
-
-    def calculate_actor_loss(self, advantages):
-        """Calculates the loss for the actor"""
-        action_log_probabilities_for_all_episodes = torch.cat(self.episode_log_action_probabilities)
-        actor_loss = -1.0 * action_log_probabilities_for_all_episodes * advantages
-        actor_loss = actor_loss.mean()
-        return actor_loss
-
-    def put_gradients_in_queue(self, total_loss):
-        """Puts gradients in a queue for the optimisation process to use to update the shared model"""
-        self.local_optimizer.zero_grad()
-        total_loss.backward()
-        torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), self.gradient_clipping_norm)
-        gradients = [param.grad.clone() for param in self.local_model.parameters()]
-        self.gradient_updates_queue.put(gradients)
-
diff --git a/agents/actor_critic_agents/DDPG.py b/agents/actor_critic_agents/DDPG.py
deleted file mode 100644
index 8c3e5fde434f2903efe0432e86dd035e9a8db5e8..0000000000000000000000000000000000000000
--- a/agents/actor_critic_agents/DDPG.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import torch
-import torch.nn.functional as functional
-from torch import optim
-from agents.Base_Agent import Base_Agent
-from utilities.data_structures.Replay_Buffer import Replay_Buffer
-from exploration_strategies.OU_Noise_Exploration import OU_Noise_Exploration
-
-class DDPG(Base_Agent):
-    """A DDPG Agent"""
-    agent_name = "DDPG"
-
-    def __init__(self, config):
-        Base_Agent.__init__(self, config)
-        self.hyperparameters = config.hyperparameters
-        self.critic_local = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1, key_to_use="Critic")
-        self.critic_target = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1, key_to_use="Critic")
-        Base_Agent.copy_model_over(self.critic_local, self.critic_target)
-
-        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
-                                           lr=self.hyperparameters["Critic"]["learning_rate"], eps=1e-4)
-        self.memory = Replay_Buffer(self.hyperparameters["Critic"]["buffer_size"], self.hyperparameters["batch_size"],
-                                    self.config.seed)
-        self.actor_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size, key_to_use="Actor")
-        self.actor_target = self.create_NN(input_dim=self.state_size, output_dim=self.action_size, key_to_use="Actor")
-        Base_Agent.copy_model_over(self.actor_local, self.actor_target)
-
-        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
-                                          lr=self.hyperparameters["Actor"]["learning_rate"], eps=1e-4)
-        self.exploration_strategy = OU_Noise_Exploration(self.config)
-
-    def step(self):
-        """Runs a step in the game"""
-        while not self.done:
-            # print("State ", self.state.shape)
-            self.action = self.pick_action()
-            self.conduct_action(self.action)
-            if self.time_for_critic_and_actor_to_learn():
-                for _ in range(self.hyperparameters["learning_updates_per_learning_session"]):
-                    states, actions, rewards, next_states, dones = self.sample_experiences()
-                    self.critic_learn(states, actions, rewards, next_states, dones)
-                    self.actor_learn(states)
-            self.save_experience()
-            self.state = self.next_state #this is to set the state for the next iteration
-            self.global_step_number += 1
-        self.episode_number += 1
-
-    def sample_experiences(self):
-        return self.memory.sample()
-
-    def pick_action(self, state=None):
-        """Picks an action using the actor network and then adds some noise to it to ensure exploration"""
-        if state is None: state = torch.from_numpy(self.state).float().unsqueeze(0).to(self.device)
-        self.actor_local.eval()
-        with torch.no_grad():
-            action = self.actor_local(state).cpu().data.numpy()
-        self.actor_local.train()
-        action = self.exploration_strategy.perturb_action_for_exploration_purposes({"action": action})
-        return action.squeeze(0)
-
-    def critic_learn(self, states, actions, rewards, next_states, dones):
-        """Runs a learning iteration for the critic"""
-        loss = self.compute_loss(states, next_states, rewards, actions, dones)
-        self.take_optimisation_step(self.critic_optimizer, self.critic_local, loss, self.hyperparameters["Critic"]["gradient_clipping_norm"])
-        self.soft_update_of_target_network(self.critic_local, self.critic_target, self.hyperparameters["Critic"]["tau"])
-
-    def compute_loss(self, states, next_states, rewards, actions, dones):
-        """Computes the loss for the critic"""
-        with torch.no_grad():
-            critic_targets = self.compute_critic_targets(next_states, rewards, dones)
-        critic_expected = self.compute_expected_critic_values(states, actions)
-        loss = functional.mse_loss(critic_expected, critic_targets)
-        return loss
-
-    def compute_critic_targets(self, next_states, rewards, dones):
-        """Computes the critic target values to be used in the loss for the critic"""
-        critic_targets_next = self.compute_critic_values_for_next_states(next_states)
-        critic_targets = self.compute_critic_values_for_current_states(rewards, critic_targets_next, dones)
-        return critic_targets
-
-    def compute_critic_values_for_next_states(self, next_states):
-        """Computes the critic values for next states to be used in the loss for the critic"""
-        with torch.no_grad():
-            actions_next = self.actor_target(next_states)
-            critic_targets_next = self.critic_target(torch.cat((next_states, actions_next), 1))
-        return critic_targets_next
-
-    def compute_critic_values_for_current_states(self, rewards, critic_targets_next, dones):
-        """Computes the critic values for current states to be used in the loss for the critic"""
-        critic_targets_current = rewards + (self.hyperparameters["discount_rate"] * critic_targets_next * (1.0 - dones))
-        return critic_targets_current
-
-    def compute_expected_critic_values(self, states, actions):
-        """Computes the expected critic values to be used in the loss for the critic"""
-        critic_expected = self.critic_local(torch.cat((states, actions), 1))
-        return critic_expected
-
-    def time_for_critic_and_actor_to_learn(self):
-        """Returns boolean indicating whether there are enough experiences to learn from and it is time to learn for the
-        actor and critic"""
-        return self.enough_experiences_to_learn_from() and self.global_step_number % self.hyperparameters["update_every_n_steps"] == 0
-
-    def actor_learn(self, states):
-        """Runs a learning iteration for the actor"""
-        if self.done: #we only update the learning rate at end of each episode
-            self.update_learning_rate(self.hyperparameters["Actor"]["learning_rate"], self.actor_optimizer)
-        actor_loss = self.calculate_actor_loss(states)
-        self.take_optimisation_step(self.actor_optimizer, self.actor_local, actor_loss,
-                                    self.hyperparameters["Actor"]["gradient_clipping_norm"])
-        self.soft_update_of_target_network(self.actor_local, self.actor_target, self.hyperparameters["Actor"]["tau"])
-
-    def calculate_actor_loss(self, states):
-        """Calculates the loss for the actor"""
-        actions_pred = self.actor_local(states)
-        actor_loss = -self.critic_local(torch.cat((states, actions_pred), 1)).mean()
-        return actor_loss
\ No newline at end of file
diff --git a/agents/actor_critic_agents/DDPG_HER.py b/agents/actor_critic_agents/DDPG_HER.py
deleted file mode 100644
index cff29b220f7d6ceb3182876e5db9dea841b4b4f3..0000000000000000000000000000000000000000
--- a/agents/actor_critic_agents/DDPG_HER.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from agents.actor_critic_agents.DDPG import DDPG
-from agents.HER_Base import HER_Base
-
-class DDPG_HER(HER_Base, DDPG):
-    """DDPG algorithm with hindsight experience replay"""
-    agent_name = "DDPG-HER"
-
-    def __init__(self, config):
-        DDPG.__init__(self, config)
-        HER_Base.__init__(self, self.hyperparameters["Critic"]["buffer_size"], self.hyperparameters["batch_size"],
-                          self.hyperparameters["HER_sample_proportion"])
-
-    def step(self):
-        """Runs a step within a game including a learning step if required"""
-        while not self.done:
-            self.action = self.pick_action()
-            self.conduct_action_in_changeable_goal_envs(self.action)
-            if self.time_for_critic_and_actor_to_learn():
-                for _ in range(self.hyperparameters["learning_updates_per_learning_session"]):
-                    states, actions, rewards, next_states, dones = self.sample_from_HER_and_Ordinary_Buffer()  # Samples experiences from buffer
-                    self.critic_learn(states, actions, rewards, next_states, dones)
-                    self.actor_learn(states)
-            self.track_changeable_goal_episodes_data()
-            self.save_experience()
-            if self.done: self.save_alternative_experience()
-            self.state_dict = self.next_state_dict  # this is to set the state for the next iteration
-            self.state = self.next_state
-            self.global_step_number += 1
-        self.episode_number += 1
-
-    def enough_experiences_to_learn_from(self):
-        """Returns boolean indicating whether there are enough experiences to learn from and it is time to learn"""
-        return len(self.memory) > self.ordinary_buffer_batch_size and len(self.HER_memory) > self.HER_buffer_batch_size
-
-
-
-
-
diff --git a/agents/actor_critic_agents/SAC.py b/agents/actor_critic_agents/SAC.py
deleted file mode 100644
index b997fe97d9b83cc6e5153a27a8ba8715c8b5c940..0000000000000000000000000000000000000000
--- a/agents/actor_critic_agents/SAC.py
+++ /dev/null
@@ -1,211 +0,0 @@
-from agents.Base_Agent import Base_Agent
-from utilities.OU_Noise import OU_Noise
-from utilities.data_structures.Replay_Buffer import Replay_Buffer
-from torch.optim import Adam
-import torch
-import torch.nn.functional as F
-from torch.distributions import Normal
-import numpy as np
-
-LOG_SIG_MAX = 2
-LOG_SIG_MIN = -20
-TRAINING_EPISODES_PER_EVAL_EPISODE = 10
-EPSILON = 1e-6
-
-class SAC(Base_Agent):
-    """Soft Actor-Critic model based on the 2018 paper https://arxiv.org/abs/1812.05905 and on this github implementation
-      https://github.com/pranz24/pytorch-soft-actor-critic. It is an actor-critic algorithm where the agent is also trained
-      to maximise the entropy of their actions as well as their cumulative reward"""
-    agent_name = "SAC"
-    def __init__(self, config):
-        Base_Agent.__init__(self, config)
-        assert self.action_types == "CONTINUOUS", "Action types must be continuous. Use SAC Discrete instead for discrete actions"
-        assert self.config.hyperparameters["Actor"]["final_layer_activation"] != "Softmax", "Final actor layer must not be softmax"
-        self.hyperparameters = config.hyperparameters
-        self.critic_local = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1, key_to_use="Critic")
-        self.critic_local_2 = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1,
-                                           key_to_use="Critic", override_seed=self.config.seed + 1)
-        self.critic_optimizer = torch.optim.Adam(self.critic_local.parameters(),
-                                                 lr=self.hyperparameters["Critic"]["learning_rate"], eps=1e-4)
-        self.critic_optimizer_2 = torch.optim.Adam(self.critic_local_2.parameters(),
-                                                   lr=self.hyperparameters["Critic"]["learning_rate"], eps=1e-4)
-        self.critic_target = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1,
-                                           key_to_use="Critic")
-        self.critic_target_2 = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1,
-                                            key_to_use="Critic")
-        Base_Agent.copy_model_over(self.critic_local, self.critic_target)
-        Base_Agent.copy_model_over(self.critic_local_2, self.critic_target_2)
-        self.memory = Replay_Buffer(self.hyperparameters["Critic"]["buffer_size"], self.hyperparameters["batch_size"],
-                                    self.config.seed)
-        self.actor_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size * 2, key_to_use="Actor")
-        self.actor_optimizer = torch.optim.Adam(self.actor_local.parameters(),
-                                          lr=self.hyperparameters["Actor"]["learning_rate"], eps=1e-4)
-        self.automatic_entropy_tuning = self.hyperparameters["automatically_tune_entropy_hyperparameter"]
-        if self.automatic_entropy_tuning:
-            self.target_entropy = -torch.prod(torch.Tensor(self.environment.action_space.shape).to(self.device)).item() # heuristic value from the paper
-            self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device)
-            self.alpha = self.log_alpha.exp()
-            self.alpha_optim = Adam([self.log_alpha], lr=self.hyperparameters["Actor"]["learning_rate"], eps=1e-4)
-        else:
-            self.alpha = self.hyperparameters["entropy_term_weight"]
-
-        self.add_extra_noise = self.hyperparameters["add_extra_noise"]
-        if self.add_extra_noise:
-            self.noise = OU_Noise(self.action_size, self.config.seed, self.hyperparameters["mu"],
-                                  self.hyperparameters["theta"], self.hyperparameters["sigma"])
-
-        self.do_evaluation_iterations = self.hyperparameters["do_evaluation_iterations"]
-
-    def save_result(self):
-        """Saves the result of an episode of the game. Overriding the method in Base Agent that does this because we only
-        want to keep track of the results during the evaluation episodes"""
-        if self.episode_number == 1 or not self.do_evaluation_iterations:
-            self.game_full_episode_scores.extend([self.total_episode_score_so_far])
-            self.rolling_results.append(np.mean(self.game_full_episode_scores[-1 * self.rolling_score_window:]))
-            self.save_max_result_seen()
-
-        elif (self.episode_number - 1) % TRAINING_EPISODES_PER_EVAL_EPISODE == 0:
-            self.game_full_episode_scores.extend([self.total_episode_score_so_far for _ in range(TRAINING_EPISODES_PER_EVAL_EPISODE)])
-            self.rolling_results.extend([np.mean(self.game_full_episode_scores[-1 * self.rolling_score_window:]) for _ in range(TRAINING_EPISODES_PER_EVAL_EPISODE)])
-            self.save_max_result_seen()
-
-    def reset_game(self):
-        """Resets the game information so we are ready to play a new episode"""
-        Base_Agent.reset_game(self)
-        if self.add_extra_noise: self.noise.reset()
-
-    def step(self):
-        """Runs an episode on the game, saving the experience and running a learning step if appropriate"""
-        eval_ep = self.episode_number % TRAINING_EPISODES_PER_EVAL_EPISODE == 0 and self.do_evaluation_iterations
-        self.episode_step_number_val = 0
-        while not self.done:
-            self.episode_step_number_val += 1
-            self.action = self.pick_action(eval_ep)
-            self.conduct_action(self.action)
-            if self.time_for_critic_and_actor_to_learn():
-                for _ in range(self.hyperparameters["learning_updates_per_learning_session"]):
-                    self.learn()
-            mask = False if self.episode_step_number_val >= self.environment._max_episode_steps else self.done
-            if not eval_ep: self.save_experience(experience=(self.state, self.action, self.reward, self.next_state, mask))
-            self.state = self.next_state
-            self.global_step_number += 1
-        print(self.total_episode_score_so_far)
-        if eval_ep: self.print_summary_of_latest_evaluation_episode()
-        self.episode_number += 1
-
-    def pick_action(self, eval_ep, state=None):
-        """Picks an action using one of three methods: 1) Randomly if we haven't passed a certain number of steps,
-         2) Using the actor in evaluation mode if eval_ep is True  3) Using the actor in training mode if eval_ep is False.
-         The difference between evaluation and training mode is that training mode does more exploration"""
-        if state is None: state = self.state
-        if eval_ep: action = self.actor_pick_action(state=state, eval=True)
-        elif self.global_step_number < self.hyperparameters["min_steps_before_learning"]:
-            action = self.environment.action_space.sample()
-            print("Picking random action ", action)
-        else: action = self.actor_pick_action(state=state)
-        if self.add_extra_noise:
-            action += self.noise.sample()
-        return action
-
-    def actor_pick_action(self, state=None, eval=False):
-        """Uses actor to pick an action in one of two ways: 1) If eval = False and we aren't in eval mode then it picks
-        an action that has partly been randomly sampled 2) If eval = True then we pick the action that comes directly
-        from the network and so did not involve any random sampling"""
-        if state is None: state = self.state
-        state = torch.FloatTensor([state]).to(self.device)
-        if len(state.shape) == 1: state = state.unsqueeze(0)
-        if eval == False: action, _, _ = self.produce_action_and_action_info(state)
-        else:
-            with torch.no_grad():
-                _, z, action = self.produce_action_and_action_info(state)
-        action = action.detach().cpu().numpy()
-        return action[0]
-
-    def produce_action_and_action_info(self, state):
-        """Given the state, produces an action, the log probability of the action, and the tanh of the mean action"""
-        actor_output = self.actor_local(state)
-        mean, log_std = actor_output[:, :self.action_size], actor_output[:, self.action_size:]
-        std = log_std.exp()
-        normal = Normal(mean, std)
-        x_t = normal.rsample()  #rsample means it is sampled using reparameterisation trick
-        action = torch.tanh(x_t)
-        log_prob = normal.log_prob(x_t)
-        log_prob -= torch.log(1 - action.pow(2) + EPSILON)
-        log_prob = log_prob.sum(1, keepdim=True)
-        return action, log_prob, torch.tanh(mean)
-
-    def time_for_critic_and_actor_to_learn(self):
-        """Returns boolean indicating whether there are enough experiences to learn from and it is time to learn for the
-        actor and critic"""
-        return self.global_step_number > self.hyperparameters["min_steps_before_learning"] and \
-               self.enough_experiences_to_learn_from() and self.global_step_number % self.hyperparameters["update_every_n_steps"] == 0
-
-    def learn(self):
-        """Runs a learning iteration for the actor, both critics and (if specified) the temperature parameter"""
-        state_batch, action_batch, reward_batch, next_state_batch, mask_batch = self.sample_experiences()
-        qf1_loss, qf2_loss = self.calculate_critic_losses(state_batch, action_batch, reward_batch, next_state_batch, mask_batch)
-        self.update_critic_parameters(qf1_loss, qf2_loss)
-
-        policy_loss, log_pi = self.calculate_actor_loss(state_batch)
-        if self.automatic_entropy_tuning: alpha_loss = self.calculate_entropy_tuning_loss(log_pi)
-        else: alpha_loss = None
-        self.update_actor_parameters(policy_loss, alpha_loss)
-
-    def sample_experiences(self):
-        return  self.memory.sample()
-
-    def calculate_critic_losses(self, state_batch, action_batch, reward_batch, next_state_batch, mask_batch):
-        """Calculates the losses for the two critics. This is the ordinary Q-learning loss except the additional entropy
-         term is taken into account"""
-        with torch.no_grad():
-            next_state_action, next_state_log_pi, _ = self.produce_action_and_action_info(next_state_batch)
-            qf1_next_target = self.critic_target(torch.cat((next_state_batch, next_state_action), 1))
-            qf2_next_target = self.critic_target_2(torch.cat((next_state_batch, next_state_action), 1))
-            min_qf_next_target = torch.min(qf1_next_target, qf2_next_target) - self.alpha * next_state_log_pi
-            next_q_value = reward_batch + (1.0 - mask_batch) * self.hyperparameters["discount_rate"] * (min_qf_next_target)
-        qf1 = self.critic_local(torch.cat((state_batch, action_batch), 1))
-        qf2 = self.critic_local_2(torch.cat((state_batch, action_batch), 1))
-        qf1_loss = F.mse_loss(qf1, next_q_value)
-        qf2_loss = F.mse_loss(qf2, next_q_value)
-        return qf1_loss, qf2_loss
-
-    def calculate_actor_loss(self, state_batch):
-        """Calculates the loss for the actor. This loss includes the additional entropy term"""
-        action, log_pi, _ = self.produce_action_and_action_info(state_batch)
-        qf1_pi = self.critic_local(torch.cat((state_batch, action), 1))
-        qf2_pi = self.critic_local_2(torch.cat((state_batch, action), 1))
-        min_qf_pi = torch.min(qf1_pi, qf2_pi)
-        policy_loss = ((self.alpha * log_pi) - min_qf_pi).mean()
-        return policy_loss, log_pi
-
-    def calculate_entropy_tuning_loss(self, log_pi):
-        """Calculates the loss for the entropy temperature parameter. This is only relevant if self.automatic_entropy_tuning
-        is True."""
-        alpha_loss = -(self.log_alpha * (log_pi + self.target_entropy).detach()).mean()
-        return alpha_loss
-
-    def update_critic_parameters(self, critic_loss_1, critic_loss_2):
-        """Updates the parameters for both critics"""
-        self.take_optimisation_step(self.critic_optimizer, self.critic_local, critic_loss_1,
-                                    self.hyperparameters["Critic"]["gradient_clipping_norm"])
-        self.take_optimisation_step(self.critic_optimizer_2, self.critic_local_2, critic_loss_2,
-                                    self.hyperparameters["Critic"]["gradient_clipping_norm"])
-        self.soft_update_of_target_network(self.critic_local, self.critic_target,
-                                           self.hyperparameters["Critic"]["tau"])
-        self.soft_update_of_target_network(self.critic_local_2, self.critic_target_2,
-                                           self.hyperparameters["Critic"]["tau"])
-
-    def update_actor_parameters(self, actor_loss, alpha_loss):
-        """Updates the parameters for the actor and (if specified) the temperature parameter"""
-        self.take_optimisation_step(self.actor_optimizer, self.actor_local, actor_loss,
-                                    self.hyperparameters["Actor"]["gradient_clipping_norm"])
-        if alpha_loss is not None:
-            self.take_optimisation_step(self.alpha_optim, None, alpha_loss, None)
-            self.alpha = self.log_alpha.exp()
-
-    def print_summary_of_latest_evaluation_episode(self):
-        """Prints a summary of the latest episode"""
-        print(" ")
-        print("----------------------------")
-        print("Episode score {} ".format(self.total_episode_score_so_far))
-        print("----------------------------")
\ No newline at end of file
diff --git a/agents/actor_critic_agents/SAC_Discrete.py b/agents/actor_critic_agents/SAC_Discrete.py
deleted file mode 100644
index c4ebe0582133b21b2521b73ccb5ae2dbdeff6d78..0000000000000000000000000000000000000000
--- a/agents/actor_critic_agents/SAC_Discrete.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import torch
-from torch.optim import Adam
-import torch.nn.functional as F
-import numpy as np
-from agents.Base_Agent import Base_Agent
-from utilities.data_structures.Replay_Buffer import Replay_Buffer
-from agents.actor_critic_agents.SAC import SAC
-from utilities.Utility_Functions import create_actor_distribution
-
-class SAC_Discrete(SAC):
-    """The Soft Actor Critic for discrete actions. It inherits from SAC for continuous actions and only changes a few
-    methods."""
-    agent_name = "SAC"
-    def __init__(self, config):
-        Base_Agent.__init__(self, config)
-        assert self.action_types == "DISCRETE", "Action types must be discrete. Use SAC instead for continuous actions"
-        assert self.config.hyperparameters["Actor"]["final_layer_activation"] == "Softmax", "Final actor layer must be softmax"
-        self.hyperparameters = config.hyperparameters
-        self.critic_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size, key_to_use="Critic")
-        self.critic_local_2 = self.create_NN(input_dim=self.state_size, output_dim=self.action_size,
-                                           key_to_use="Critic", override_seed=self.config.seed + 1)
-        self.critic_optimizer = torch.optim.Adam(self.critic_local.parameters(),
-                                                 lr=self.hyperparameters["Critic"]["learning_rate"], eps=1e-4)
-        self.critic_optimizer_2 = torch.optim.Adam(self.critic_local_2.parameters(),
-                                                   lr=self.hyperparameters["Critic"]["learning_rate"], eps=1e-4)
-        self.critic_target = self.create_NN(input_dim=self.state_size, output_dim=self.action_size,
-                                           key_to_use="Critic")
-        self.critic_target_2 = self.create_NN(input_dim=self.state_size, output_dim=self.action_size,
-                                            key_to_use="Critic")
-        Base_Agent.copy_model_over(self.critic_local, self.critic_target)
-        Base_Agent.copy_model_over(self.critic_local_2, self.critic_target_2)
-        self.memory = Replay_Buffer(self.hyperparameters["Critic"]["buffer_size"], self.hyperparameters["batch_size"],
-                                    self.config.seed, device=self.device)
-
-        self.actor_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size, key_to_use="Actor")
-        self.actor_optimizer = torch.optim.Adam(self.actor_local.parameters(),
-                                          lr=self.hyperparameters["Actor"]["learning_rate"], eps=1e-4)
-        self.automatic_entropy_tuning = self.hyperparameters["automatically_tune_entropy_hyperparameter"]
-        if self.automatic_entropy_tuning:
-            # we set the max possible entropy as the target entropy
-            self.target_entropy = -np.log((1.0 / self.action_size)) * 0.98
-            self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device)
-            self.alpha = self.log_alpha.exp()
-            self.alpha_optim = Adam([self.log_alpha], lr=self.hyperparameters["Actor"]["learning_rate"], eps=1e-4)
-        else:
-            self.alpha = self.hyperparameters["entropy_term_weight"]
-        assert not self.hyperparameters["add_extra_noise"], "There is no add extra noise option for the discrete version of SAC at moment"
-        self.add_extra_noise = False
-        self.do_evaluation_iterations = self.hyperparameters["do_evaluation_iterations"]
-
-    def produce_action_and_action_info(self, state):
-        """Given the state, produces an action, the probability of the action, the log probability of the action, and
-        the argmax action"""
-        action_probabilities = self.actor_local(state)
-        max_probability_action = torch.argmax(action_probabilities, dim=-1)
-        action_distribution = create_actor_distribution(self.action_types, action_probabilities, self.action_size)
-        action = action_distribution.sample().cpu()
-        # Have to deal with situation of 0.0 probabilities because we can't do log 0
-        z = action_probabilities == 0.0
-        z = z.float() * 1e-8
-        log_action_probabilities = torch.log(action_probabilities + z)
-        return action, (action_probabilities, log_action_probabilities), max_probability_action
-
-    def calculate_critic_losses(self, state_batch, action_batch, reward_batch, next_state_batch, mask_batch):
-        """Calculates the losses for the two critics. This is the ordinary Q-learning loss except the additional entropy
-         term is taken into account"""
-        with torch.no_grad():
-            next_state_action, (action_probabilities, log_action_probabilities), _ = self.produce_action_and_action_info(next_state_batch)
-            qf1_next_target = self.critic_target(next_state_batch)
-            qf2_next_target = self.critic_target_2(next_state_batch)
-            min_qf_next_target = action_probabilities * (torch.min(qf1_next_target, qf2_next_target) - self.alpha * log_action_probabilities)
-            min_qf_next_target = min_qf_next_target.sum(dim=1).unsqueeze(-1)
-            next_q_value = reward_batch + (1.0 - mask_batch) * self.hyperparameters["discount_rate"] * (min_qf_next_target)
-
-        qf1 = self.critic_local(state_batch).gather(1, action_batch.long())
-        qf2 = self.critic_local_2(state_batch).gather(1, action_batch.long())
-        qf1_loss = F.mse_loss(qf1, next_q_value)
-        qf2_loss = F.mse_loss(qf2, next_q_value)
-        return qf1_loss, qf2_loss
-
-    def calculate_actor_loss(self, state_batch):
-        """Calculates the loss for the actor. This loss includes the additional entropy term"""
-        action, (action_probabilities, log_action_probabilities), _ = self.produce_action_and_action_info(state_batch)
-        qf1_pi = self.critic_local(state_batch)
-        qf2_pi = self.critic_local_2(state_batch)
-        min_qf_pi = torch.min(qf1_pi, qf2_pi)
-        inside_term = self.alpha * log_action_probabilities - min_qf_pi
-        policy_loss = (action_probabilities * inside_term).sum(dim=1).mean()
-        log_action_probabilities = torch.sum(log_action_probabilities * action_probabilities, dim=1)
-        return policy_loss, log_action_probabilities
-
-    def locally_save_policy(self):
-        """Saves the policy"""
-        torch.save(self.actor_local.state_dict(), "{}/{}_network.pt".format(self.config.models_dir, self.agent_name))
diff --git a/agents/actor_critic_agents/TD3.py b/agents/actor_critic_agents/TD3.py
deleted file mode 100644
index 512482ea3451311f0b7e5b26c4a0c96f4d34af7b..0000000000000000000000000000000000000000
--- a/agents/actor_critic_agents/TD3.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import torch
-import torch.nn.functional as functional
-from torch import optim
-from agents.Base_Agent import Base_Agent
-from .DDPG import DDPG
-from exploration_strategies.Gaussian_Exploration import Gaussian_Exploration
-
-class TD3(DDPG):
-    """A TD3 Agent from the paper Addressing Function Approximation Error in Actor-Critic Methods (Fujimoto et al. 2018)
-    https://arxiv.org/abs/1802.09477"""
-    agent_name = "TD3"
-
-    def __init__(self, config):
-        DDPG.__init__(self, config)
-        self.critic_local_2 = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1,
-                                           key_to_use="Critic", override_seed=self.config.seed + 1)
-        self.critic_target_2 = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1,
-                                            key_to_use="Critic")
-        Base_Agent.copy_model_over(self.critic_local_2, self.critic_target_2)
-        self.critic_optimizer_2 = optim.Adam(self.critic_local_2.parameters(),
-                                           lr=self.hyperparameters["Critic"]["learning_rate"], eps=1e-4)
-        self.exploration_strategy_critic = Gaussian_Exploration(self.config)
-
-    def compute_critic_values_for_next_states(self, next_states):
-        """Computes the critic values for next states to be used in the loss for the critic"""
-        with torch.no_grad():
-            actions_next = self.actor_target(next_states)
-            actions_next_with_noise =  self.exploration_strategy_critic.perturb_action_for_exploration_purposes({"action": actions_next})
-            critic_targets_next_1 = self.critic_target(torch.cat((next_states, actions_next_with_noise), 1))
-            critic_targets_next_2 = self.critic_target_2(torch.cat((next_states, actions_next_with_noise), 1))
-            critic_targets_next = torch.min(torch.cat((critic_targets_next_1, critic_targets_next_2),1), dim=1)[0].unsqueeze(-1)
-        return critic_targets_next
-
-    def critic_learn(self, states, actions, rewards, next_states, dones):
-        """Runs a learning iteration for both the critics"""
-        critic_targets_next =  self.compute_critic_values_for_next_states(next_states)
-        critic_targets = self.compute_critic_values_for_current_states(rewards, critic_targets_next, dones)
-
-        critic_expected_1 = self.critic_local(torch.cat((states, actions), 1))
-        critic_expected_2 = self.critic_local_2(torch.cat((states, actions), 1))
-
-        critic_loss_1 = functional.mse_loss(critic_expected_1, critic_targets)
-        critic_loss_2 = functional.mse_loss(critic_expected_2, critic_targets)
-
-        self.take_optimisation_step(self.critic_optimizer, self.critic_local, critic_loss_1, self.hyperparameters["Critic"]["gradient_clipping_norm"])
-        self.take_optimisation_step(self.critic_optimizer_2, self.critic_local_2, critic_loss_2,
-                                    self.hyperparameters["Critic"]["gradient_clipping_norm"])
-
-        self.soft_update_of_target_network(self.critic_local, self.critic_target, self.hyperparameters["Critic"]["tau"])
-        self.soft_update_of_target_network(self.critic_local_2, self.critic_target_2, self.hyperparameters["Critic"]["tau"])
-
-
-
-
diff --git a/agents/actor_critic_agents/__pycache__/A2C.cpython-39.pyc b/agents/actor_critic_agents/__pycache__/A2C.cpython-39.pyc
deleted file mode 100644
index 42c70fb0e8012f59da32c66d12fae4e2ffdacc89..0000000000000000000000000000000000000000
Binary files a/agents/actor_critic_agents/__pycache__/A2C.cpython-39.pyc and /dev/null differ
diff --git a/agents/actor_critic_agents/__pycache__/A3C.cpython-39.pyc b/agents/actor_critic_agents/__pycache__/A3C.cpython-39.pyc
deleted file mode 100644
index 6c76a1fe0078a711343dcf99464168f32d6c2a66..0000000000000000000000000000000000000000
Binary files a/agents/actor_critic_agents/__pycache__/A3C.cpython-39.pyc and /dev/null differ
diff --git a/agents/actor_critic_agents/__pycache__/DDPG.cpython-39.pyc b/agents/actor_critic_agents/__pycache__/DDPG.cpython-39.pyc
deleted file mode 100644
index e81d516174254b4e6932316cbe4e0f71c83e6f9d..0000000000000000000000000000000000000000
Binary files a/agents/actor_critic_agents/__pycache__/DDPG.cpython-39.pyc and /dev/null differ
diff --git a/agents/actor_critic_agents/__pycache__/SAC.cpython-310.pyc b/agents/actor_critic_agents/__pycache__/SAC.cpython-310.pyc
deleted file mode 100644
index 8340c1af03d92941e6ba013f843ae544be1ec0a6..0000000000000000000000000000000000000000
Binary files a/agents/actor_critic_agents/__pycache__/SAC.cpython-310.pyc and /dev/null differ
diff --git a/agents/actor_critic_agents/__pycache__/SAC.cpython-39.pyc b/agents/actor_critic_agents/__pycache__/SAC.cpython-39.pyc
deleted file mode 100644
index b64dc1d5730e63df4220d1f998be5e43b83905dc..0000000000000000000000000000000000000000
Binary files a/agents/actor_critic_agents/__pycache__/SAC.cpython-39.pyc and /dev/null differ
diff --git a/agents/actor_critic_agents/__pycache__/SAC_Discrete.cpython-310.pyc b/agents/actor_critic_agents/__pycache__/SAC_Discrete.cpython-310.pyc
deleted file mode 100644
index 78dc28c5b412aa5e9e66b003218d7d73693e06f8..0000000000000000000000000000000000000000
Binary files a/agents/actor_critic_agents/__pycache__/SAC_Discrete.cpython-310.pyc and /dev/null differ
diff --git a/agents/actor_critic_agents/__pycache__/SAC_Discrete.cpython-39.pyc b/agents/actor_critic_agents/__pycache__/SAC_Discrete.cpython-39.pyc
deleted file mode 100644
index 41b7446733875ddd2aed27dd7bbb134d284a070a..0000000000000000000000000000000000000000
Binary files a/agents/actor_critic_agents/__pycache__/SAC_Discrete.cpython-39.pyc and /dev/null differ
diff --git a/agents/hierarchical_agents/DIAYN.py b/agents/hierarchical_agents/DIAYN.py
deleted file mode 100644
index a44be6425433acedff285a2872b332b168ececca..0000000000000000000000000000000000000000
--- a/agents/hierarchical_agents/DIAYN.py
+++ /dev/null
@@ -1,128 +0,0 @@
-import torch
-from gym import Wrapper, spaces
-from torch import optim, nn
-import numpy as np
-import random
-import time
-import copy
-import torch.nn.functional as F
-from agents.Base_Agent import Base_Agent
-from agents.DQN_agents.DDQN import DDQN
-from agents.actor_critic_agents.SAC import SAC
-
-# NOTE: DIAYN calculates diversity of states penalty each timestep but it might be better to only base it on where the
-# agent got to in the last timestep, or after X timesteps
-# NOTE another problem with this is that the discriminator is trained from online data as it comes in which isn't iid
-# so we could probably make it perform better by maintaining a replay buffer and using that to train the discriminator instead
-
-class DIAYN(Base_Agent):
-    """Hierarchical RL agent based on the paper Diversity is all you need (2018) - https://arxiv.org/pdf/1802.06070.pdf.
-    Works in two stages:
-        1) First it trains an agent that tries to reach different states depending on which skill number is
-           inputted
-        2) Then it trains an agent to maximise reward using its choice of skill for the lower level agent"""
-    agent_name = "DIAYN"
-    def __init__(self, config):
-        super().__init__(config)
-        self.training_mode = True
-        self.num_skills = config.hyperparameters["num_skills"]
-        self.unsupervised_episodes = config.hyperparameters["num_unsupservised_episodes"]
-        self.supervised_episodes = config.num_episodes_to_run - self.unsupervised_episodes
-
-        assert self.hyperparameters["DISCRIMINATOR"]["final_layer_activation"] == None, "Final layer activation for disciminator should be None"
-        self.discriminator = self.create_NN(self.state_size, self.num_skills, key_to_use="DISCRIMINATOR")
-        self.discriminator_optimizer = optim.Adam(self.discriminator.parameters(),
-                                              lr=self.hyperparameters["DISCRIMINATOR"]["learning_rate"])
-        self.agent_config = copy.deepcopy(config)
-        self.agent_config.environment = DIAYN_Skill_Wrapper(copy.deepcopy(self.environment), self.num_skills, self)
-        self.agent_config.hyperparameters = self.agent_config.hyperparameters["AGENT"]
-        self.agent_config.hyperparameters["do_evaluation_iterations"] = False
-        self.agent = SAC(self.agent_config)  #We have to use SAC because it involves maximising the policy's entropy over actions which is also a part of DIAYN
-
-        self.timesteps_to_give_up_control_for = self.hyperparameters["MANAGER"]["timesteps_to_give_up_control_for"]
-        self.manager_agent_config = copy.deepcopy(config)
-        self.manager_agent_config.environment = DIAYN_Manager_Agent_Wrapper(copy.deepcopy(self.environment), self.agent,
-                                                                            self.timesteps_to_give_up_control_for, self.num_skills)
-        self.manager_agent_config.hyperparameters = self.manager_agent_config.hyperparameters["MANAGER"]
-        self.manager_agent = DDQN(self.manager_agent_config)
-
-    def run_n_episodes(self, num_episodes=None, show_whether_achieved_goal=True, save_and_print_results=True):
-        start = time.time()
-        self.agent.run_n_episodes(num_episodes=self.unsupervised_episodes, show_whether_achieved_goal=False)
-        game_full_episode_scores, rolling_results, _ = self.manager_agent.run_n_episodes(num_episodes=self.supervised_episodes)
-        time_taken = time.time() - start
-        pretraining_results = [np.min(self.agent.game_full_episode_scores)]*self.unsupervised_episodes
-        return pretraining_results + game_full_episode_scores, pretraining_results + rolling_results, time_taken
-
-    def disciminator_learn(self, skill, discriminator_outputs):
-        if not self.training_mode: return
-        assert isinstance(skill, int)
-        assert discriminator_outputs.shape[0] == 1
-        assert discriminator_outputs.shape[1] == self.num_skills
-        loss = nn.CrossEntropyLoss()(discriminator_outputs, torch.Tensor([skill]).long())
-        self.take_optimisation_step(self.discriminator_optimizer, self.discriminator, loss,
-                                    self.hyperparameters["DISCRIMINATOR"]["gradient_clipping_norm"])
-
-    def get_predicted_probability_of_skill(self, skill, next_state):
-        """Gets the probability that the disciminator gives to the correct skill"""
-        predicted_probabilities_unnormalised = self.discriminator(torch.Tensor(next_state).unsqueeze(0))
-        probability_of_correct_skill = F.softmax(predicted_probabilities_unnormalised)[:, skill]
-        return  probability_of_correct_skill.item(), predicted_probabilities_unnormalised
-
-class DIAYN_Skill_Wrapper(Wrapper):
-    """Open AI gym wrapper to help create a pretraining environment in which to train diverse skills according to the
-    specification in the Diversity is all you need (2018) paper """
-    def __init__(self, env, num_skills, meta_agent):
-        Wrapper.__init__(self, env)
-        self.num_skills = num_skills
-        self.meta_agent = meta_agent
-        self.prior_probability_of_skill = 1.0 / self.num_skills #Each skill equally likely to be chosen
-        self._max_episode_steps = self.env._max_episode_steps
-
-    def reset(self, **kwargs):
-        observation = self.env.reset(**kwargs)
-        self.skill = random.randint(0, self.num_skills - 1)
-        return self.observation(observation)
-
-    def observation(self, observation):
-        return np.concatenate((np.array(observation), np.array([self.skill])))
-
-    def step(self, action):
-        next_state, _, done, _ = self.env.step(action)
-        new_reward, discriminator_outputs = self.calculate_new_reward(next_state)
-        self.meta_agent.disciminator_learn(self.skill, discriminator_outputs)
-        return self.observation(next_state), new_reward, done, _
-
-    def calculate_new_reward(self, next_state):
-        """Calculates an intrinsic reward that encourages maximum exploration. It also keeps track of the discriminator
-        outputs so they can be used for training"""
-        probability_correct_skill, disciminator_outputs =  self.meta_agent.get_predicted_probability_of_skill(self.skill, next_state)
-        new_reward = np.log(probability_correct_skill + 1e-8) - np.log(self.prior_probability_of_skill)
-        return new_reward, disciminator_outputs
-
-
-class DIAYN_Manager_Agent_Wrapper(Wrapper):
-    """Environment wrapper for the meta agent. The meta agent uses this environment to take in the state, decide on a skill
-     and then grant over control to the lower-level skill for a set number of timesteps"""
-    def __init__(self, env, lower_level_agent, timesteps_to_give_up_control_for, num_skills):
-        Wrapper.__init__(self, env)
-        self.action_space = spaces.Discrete(num_skills)
-        self.lower_level_agent = lower_level_agent
-        self.timesteps_to_give_up_control_for = timesteps_to_give_up_control_for
-
-    def reset(self, **kwargs):
-        self.state = self.env.reset(**kwargs)
-        return self.state
-
-    def step(self, skill_chosen):
-        """Runs a step in the game from the perspective of the manager agent. This involves giving up control to the
-        lower-level agent for a set number of steps"""
-        cumulative_reward = 0
-        for _ in range(self.timesteps_to_give_up_control_for):
-            combined_state = np.concatenate((np.array(self.state), np.array([skill_chosen])))
-            action = self.lower_level_agent.pick_action(eval_ep=True, state=combined_state)
-            next_state, reward, done, _ = self.env.step(action)
-            cumulative_reward += reward
-            self.state = next_state
-            if done: break
-        return next_state, cumulative_reward, done, _
diff --git a/agents/hierarchical_agents/HIRO.py b/agents/hierarchical_agents/HIRO.py
deleted file mode 100644
index 558a0f91adb8c0107c37ccd77e9465840c57363b..0000000000000000000000000000000000000000
--- a/agents/hierarchical_agents/HIRO.py
+++ /dev/null
@@ -1,267 +0,0 @@
-import copy
-import torch
-import numpy as np
-from gym import Wrapper
-from agents.Base_Agent import Base_Agent
-from agents.actor_critic_agents.DDPG import DDPG
-from agents.Trainer import Trainer
-
-
-class HIRO(Base_Agent):
-    agent_name = "HIRO"
-
-    def __init__(self, config):
-        super().__init__(config)
-        self.max_sub_policy_timesteps = config.hyperparameters["LOWER_LEVEL"]["max_lower_level_timesteps"]
-        self.config.hyperparameters = self.config.hyperparameters
-
-        self.higher_level_state = None #true state of environment
-        self.higher_level_next_state = None
-
-        self.higher_level_reward = None
-        self.lower_level_reward = None
-
-        self.higher_level_done = False
-        self.lower_level_done = False
-
-        self.goal = None
-
-        self.lower_level_state = None #state of environment with goal appended
-        self.lower_level_next_state = None
-
-        self.lower_level_agent_config = copy.deepcopy(config)
-        self.lower_level_agent_config.hyperparameters = self.lower_level_agent_config.hyperparameters["LOWER_LEVEL"]
-
-        self.lower_level_agent_config.environment = Lower_Level_Agent_Environment_Wrapper(self.environment, self, self.max_sub_policy_timesteps)
-        self.lower_level_agent = DDPG(self.lower_level_agent_config)
-
-        self.lower_level_agent.average_score_required_to_win = float("inf")
-
-        self.higher_level_agent_config = copy.deepcopy(config)
-        self.higher_level_agent_config.hyperparameters = self.higher_level_agent_config.hyperparameters["HIGHER_LEVEL"]
-        self.higher_level_agent_config.environment = Higher_Level_Agent_Environment_Wrapper(self.environment, self)
-        self.higher_level_agent = HIRO_Higher_Level_DDPG_Agent(self.higher_level_agent_config, self.lower_level_agent.actor_local)
-
-        self.step_lower_level_states = []
-        self.step_lower_level_action_seen = []
-
-
-    def run_n_episodes(self):
-        """Runs game to completion n times and then summarises results and saves model (if asked to)"""
-        self.higher_level_agent.run_n_episodes(self.config.num_episodes_to_run)
-
-    @staticmethod
-    def goal_transition(state, goal, next_state):
-        """Provides updated goal according to the goal transition function in the HIRO paper"""
-        return state + goal - next_state
-
-    def save_higher_level_experience(self):
-        self.higher_level_agent.step_lower_level_states = self.step_lower_level_states
-        self.higher_level_agent.step_lower_level_action_seen = self.step_lower_level_action_seen
-
-class HIRO_Higher_Level_DDPG_Agent(DDPG):
-    """Extends DDPG so that it can function as the higher level agent in the HIRO hierarchical RL algorithm. This only involves
-    changing how the agent saves experiences and samples them for learning"""
-
-    def __init__(self, config, lower_level_policy):
-        super(HIRO_Higher_Level_DDPG_Agent, self).__init__(config)
-        self.lower_level_policy = lower_level_policy
-        self.number_goal_candidates = config.hyperparameters["number_goal_candidates"]
-
-    def save_experience(self, memory=None, experience=None):
-        """Saves the recent experience to the memory buffer. Adapted from normal DDPG so that it saves the sequence of
-        states, goals and actions that we saw whilst control was given to the lower level"""
-        if memory is None: memory = self.memory
-        if experience is None: experience = self.step_lower_level_states, self.step_lower_level_action_seen, self.reward, self.next_state, self.done
-        memory.add_experience(*experience)
-
-    def sample_experiences(self):
-        experiences = self.memory.produce_action_and_action_info(separate_out_data_types=False)
-        assert len(experiences[0].state) == self.hyperparameters["max_lower_level_timesteps"] or experiences[0].done
-        assert experiences[0].state[0].shape[0] == self.state_size * 2
-        assert len(experiences[0].action) == self.hyperparameters["max_lower_level_timesteps"] or experiences[0].done
-
-        states = []
-        actions = []
-        rewards = []
-        next_states = []
-        dones = []
-
-        for ix, experience in enumerate(experiences):
-            state, action, reward, next_state, done = self.transform_goal_to_one_most_likely_to_have_induced_actions(experience)
-            states.append(state)
-            actions.append(action)
-            rewards.append(reward)
-            next_states.append(next_state)
-            dones.append(done)
-
-        states = torch.from_numpy(np.vstack([state for state in states])).float().to(self.device)
-        actions = torch.from_numpy(np.vstack([action for action in actions])).float().to(self.device)
-        rewards = torch.from_numpy(np.vstack([reward for reward in rewards])).float().to(self.device)
-        next_states = torch.from_numpy(np.vstack([next_state for next_state in next_states])).float().to(self.device)
-        dones = torch.from_numpy(np.vstack([int(done) for done in dones])).float().to(self.device)
-
-        return states, actions, rewards, next_states, dones
-
-    def transform_goal_to_one_most_likely_to_have_induced_actions(self, experience):
-        """Transforms the goal in an experience to the goal that would have been most likely to induce the actions chosen
-        by the lower level agent in the experience"""
-        goal_candidate_state_change = [experience.state[-1][:self.state_size] - experience.state[0][:self.state_size]]
-        goal_candidate_actual_goal = [experience.state[0][self.state_size:]]
-        goal_candidate_state_change_random_iterations = [np.random.normal(goal_candidate_state_change[0]) for _ in range(self.number_goal_candidates - 2)]
-        goal_candidates = goal_candidate_state_change + goal_candidate_actual_goal + goal_candidate_state_change_random_iterations
-
-        max = float("-inf")
-        timesteps_in_experience = len(experience.state)
-
-        for goal_ix, goal in enumerate(goal_candidates):
-            log_probability_total = 0
-            for state_ix in range(timesteps_in_experience):
-                state_obs = experience.state[state_ix][:self.state_size]
-                action = experience.action[state_ix]
-                log_probability= self.log_probability_lower_level_picks_action(state_obs, goal, action)
-                log_probability_total += log_probability
-                if state_ix != timesteps_in_experience - 1:
-                    next_state = experience.state[state_ix+1][:self.state_size]
-                    goal = HIRO.goal_transition(state_obs, goal, next_state)
-            if log_probability_total >= max:
-                max = log_probability_total
-                best_goal_ix = goal_ix
-
-        state = experience.state[0][:self.state_size]
-        next_state = experience.next_state
-        reward = experience.reward
-        action = goal_candidates[best_goal_ix]
-        done = experience.done
-
-        assert next_state.shape[0] == self.state_size
-
-        return state, action, reward, next_state, done
-
-
-    def log_probability_lower_level_picks_action(self, state, goal, action):
-        """Calculates the log probability that the lower level agent would have chosen this action given the state
-        and goal as inputs"""
-        state_and_goal = torch.from_numpy(np.concatenate((state, goal))).float().unsqueeze(0).to(self.device)
-        action_would_have_taken = self.lower_level_policy(state_and_goal).detach()
-        return -0.5 * torch.norm(action - action_would_have_taken, 2)**2
-
-
-class Higher_Level_Agent_Environment_Wrapper(Wrapper):
-    """Adapts the game environment so that it is compatible with the higher level agent which sets goals for the lower
-    level agent"""
-    def __init__(self, env, HIRO_agent):
-        Wrapper.__init__(self, env)
-        self.env = env
-        self.HIRO_agent = HIRO_agent
-        self.action_space = self.observation_space
-
-
-    def reset(self, **kwargs):
-        self.HIRO_agent.higher_level_state = self.env.reset(**kwargs)
-        return self.HIRO_agent.higher_level_state
-
-    def step(self, goal):
-        self.HIRO_agent.higher_level_reward = 0
-        self.HIRO_agent.step_lower_level_states = []
-        self.HIRO_agent.step_lower_level_action_seen = []
-
-        self.HIRO_agent.goal = goal
-        self.HIRO_agent.lower_level_agent.episode_number = 0 #must reset lower level agent to 0 episodes completed otherwise won't run more episodes
-        self.HIRO_agent.lower_level_agent.run_n_episodes(num_episodes=1, show_whether_achieved_goal=False, save_and_print_results=False)
-
-        self.HIRO_agent.save_higher_level_experience()
-
-        return self.HIRO_agent.higher_level_next_state, self.HIRO_agent.higher_level_reward, self.HIRO_agent.higher_level_done, {}
-
-class Lower_Level_Agent_Environment_Wrapper(Wrapper):
-    """Open AI gym wrapper to help create an environment where a goal from a higher-level agent is treated as part
-    of the environment state"""
-    def __init__(self, env, HIRO_agent, max_sub_policy_timesteps):
-        Wrapper.__init__(self, env)
-        self.env = env
-        self.meta_agent = HIRO_agent
-        self.max_sub_policy_timesteps = max_sub_policy_timesteps
-
-        self.track_intrinsic_rewards = []
-
-    def reset(self, **kwargs):
-        if self.meta_agent.higher_level_state is not None: state = self.meta_agent.higher_level_state
-        else:
-            print("INITIATION ONLY")
-            state = self.env.reset()
-
-        if self.meta_agent.goal is not None: goal = self.meta_agent.goal
-        else:
-            print("INITIATION ONLY")
-            goal = state
-
-        self.lower_level_timesteps = 0
-        self.meta_agent.lower_level_done = False
-
-        self.meta_agent.lower_level_state = self.turn_internal_state_to_external_state(state, goal)
-
-        return self.meta_agent.lower_level_state
-
-    def turn_internal_state_to_external_state(self, internal_state, goal):
-        return np.concatenate((np.array(internal_state), goal))
-
-    def step(self, action):
-        import random
-        if random.random() < 0.008:
-            print("Rolling intrinsic rewards {}".format(np.mean(self.track_intrinsic_rewards[-100:])))
-
-
-        self.meta_agent.step_lower_level_states.append(self.meta_agent.lower_level_state)
-        self.meta_agent.step_lower_level_action_seen.append(action)
-
-        self.lower_level_timesteps += 1
-        next_state, extrinsic_reward, done, _ = self.env.step(action)
-
-
-
-        self.update_rewards(extrinsic_reward, next_state)
-        self.update_goal(next_state)
-        self.update_state_and_next_state(next_state)
-        self.update_done(done)
-
-        return self.meta_agent.lower_level_next_state, self.meta_agent.lower_level_reward, self.meta_agent.lower_level_done, _
-
-    def update_rewards(self, extrinsic_reward, next_state):
-        self.meta_agent.higher_level_reward += extrinsic_reward
-        self.meta_agent.lower_level_reward = self.calculate_intrinsic_reward(self.meta_agent.higher_level_state,
-                                                                             next_state,
-                                                                             self.meta_agent.goal)
-    def update_goal(self, next_state):
-
-        self.meta_agent.goal = HIRO.goal_transition(self.meta_agent.higher_level_state, self.meta_agent.goal,
-                                                               next_state)
-
-    def update_state_and_next_state(self, next_state):
-        self.meta_agent.higher_level_next_state = next_state
-        self.meta_agent.lower_level_next_state = self.turn_internal_state_to_external_state(next_state,
-                                                                                            self.meta_agent.goal)
-        self.meta_agent.higher_level_state = self.meta_agent.higher_level_next_state
-        self.meta_agent.lower_level_state = self.meta_agent.lower_level_next_state
-
-    def update_done(self, done):
-        self.meta_agent.higher_level_done = done
-        self.meta_agent.lower_level_done = done or self.lower_level_timesteps >= self.max_sub_policy_timesteps
-
-
-    def calculate_intrinsic_reward(self, internal_state, internal_next_state, goal):
-        """Calculates the intrinsic reward for the agent according to whether it has made progress towards the goal
-        or not since the last timestep"""
-        desired_next_state = internal_state + goal
-        error = desired_next_state - internal_next_state
-        intrinsic_reward = -(np.dot(error, error))**0.5
-
-        self.track_intrinsic_rewards.append(intrinsic_reward)
-
-        return intrinsic_reward
-
-
-
-
-
-
diff --git a/agents/hierarchical_agents/SNN_HRL.py b/agents/hierarchical_agents/SNN_HRL.py
deleted file mode 100644
index 53a381967aa7609a353622a8185f9252b4c9c3ab..0000000000000000000000000000000000000000
--- a/agents/hierarchical_agents/SNN_HRL.py
+++ /dev/null
@@ -1,146 +0,0 @@
-import copy
-import random
-import time
-import numpy as np
-import torch
-from gym import Wrapper, spaces
-from agents.Base_Agent import Base_Agent
-from agents.policy_gradient_agents.PPO import PPO
-from agents.DQN_agents.DDQN import DDQN
-
-
-class SNN_HRL(Base_Agent):
-    """Implements the hierarchical RL agent that uses stochastic neural networks (SNN) from the paper Florensa et al. 2017
-    https://arxiv.org/pdf/1704.03012.pdf
-    Works by:
-    1) Creating a pre-training environment within which the skill_agent can learn for some period of time
-    2) Then skill_agent is frozen
-    3) Then we train a manager agent that chooses which of the pre-trained skills to let act for it for some period of time
-    Note that it only works with discrete states at the moment.
-
-    Note that this agent will not work well in environments where it is beneficial to end the game as quickly as possible
-    because then there isn't enough incentive for the skills to learn to explore different parts of the state space
-    """
-    agent_name = "SNN-HRL"
-
-    def __init__(self, config):
-        Base_Agent.__init__(self, config)
-        assert isinstance(self.environment.reset(), int) or isinstance(self.environment.reset(), np.int64) or  self.environment.reset().dtype == np.int64, "only works for discrete states currently"
-        self.num_skills = self.hyperparameters["SKILL_AGENT"]["num_skills"]
-        self.episodes_for_pretraining =  self.hyperparameters["SKILL_AGENT"]["episodes_for_pretraining"]
-        self.timesteps_before_changing_skill = self.hyperparameters["MANAGER"]["timesteps_before_changing_skill"]
-
-        self.skill_agent_config = copy.deepcopy(config)
-        self.skill_agent_config.hyperparameters = self.skill_agent_config.hyperparameters["SKILL_AGENT"]
-        self.skill_agent_config.num_episodes_to_run = self.episodes_for_pretraining
-
-        self.manager_config = copy.deepcopy(config)
-        self.manager_config.hyperparameters = self.manager_config.hyperparameters["MANAGER"]
-        self.manager_config.num_episodes_to_run = self.config.num_episodes_to_run - self.skill_agent_config.num_episodes_to_run
-
-    def run_n_episodes(self):
-        """Runs game to completion n times and then summarises results and saves model (if asked to)"""
-        start = time.time()
-
-        skill_agent = self.create_skill_training_agent()
-        skill_agent.run_n_episodes()
-        self.skill_agent_config.environment.print_state_distribution()
-        skill_agent.turn_off_any_epsilon_greedy_exploration()
-
-        manager_agent = self.create_manager_agent(skill_agent)
-        manager_agent.run_n_episodes()
-
-        time_taken = time.time() - start
-        pretraining_results = [np.min(manager_agent.game_full_episode_scores)]*self.episodes_for_pretraining
-        return pretraining_results + manager_agent.game_full_episode_scores, pretraining_results + manager_agent.rolling_results, time_taken
-
-    def create_skill_training_agent(self):
-        """Creates and instantiates a pre-training environment for the agent to learn skills in and then instantiates
-        and agent to learn in this environment"""
-        self.skill_agent_config.environment = Skill_Wrapper(copy.deepcopy(self.environment), self.environment.observation_space.n,
-                                                            self.num_skills,
-                                                            self.skill_agent_config.hyperparameters[
-                                                                "regularisation_weight"], self.skill_agent_config.hyperparameters["visitations_decay"])
-        return DDQN(self.skill_agent_config)
-
-    def create_manager_agent(self, skill_agent):
-        """Instantiates a manager agent"""
-        self.manager_config.environment = Manager_Frozen_Worker_Wrapper(copy.deepcopy(self.environment), self.num_skills,
-                                                                             self.timesteps_before_changing_skill, skill_agent)
-        return DDQN(self.manager_config)
-
-
-class Skill_Wrapper(Wrapper):
-    """Open AI gym wrapper to help create a pretraining environment in which to train skills"""
-    def __init__(self, env, num_states, num_skills, regularisation_weight, visitations_decay):
-        Wrapper.__init__(self, env)
-        self.num_skills = num_skills
-        self.num_states = num_states
-        self.state_visitations = [[0 for _ in range(num_states)] for _ in range(num_skills)]
-        self.regularisation_weight = regularisation_weight
-        self.visitations_decay = visitations_decay
-
-    def reset(self, **kwargs):
-        observation = self.env.reset(**kwargs)
-        self.skill = random.randint(0, self.num_skills - 1)
-        return self.observation(observation)
-
-    def observation(self, observation):
-        return np.concatenate((np.array(observation), np.array([self.skill])))
-
-    def step(self, action):
-        next_state, reward, done, _ = self.env.step(action)
-        new_reward = self.calculate_new_reward(reward, next_state)
-        return self.observation(next_state), new_reward, done, _
-
-    def calculate_new_reward(self, reward, next_state):
-        self.update_state_visitations(next_state)
-        probability_correct_skill = self.calculate_probability_correct_skill(next_state)
-        new_reward = reward + self.regularisation_weight * np.log(probability_correct_skill)
-        return new_reward
-
-    def update_state_visitations(self, next_state):
-        """Updates table keeping track of number of times each state visited under each skill"""
-        self.state_visitations = [[val * self.visitations_decay for val in sublist] for sublist in
-                                  self.state_visitations]
-        self.state_visitations[self.skill][next_state[0]] += 1
-
-    def calculate_probability_correct_skill(self, next_state):
-        """Calculates the probability that being in a state implies a certain skill"""
-        visitations_correct_skill = self.state_visitations[self.skill][next_state[0]]
-        visitations_any_skill = np.sum([visit[next_state[0]] for visit in self.state_visitations])
-        probability = float(visitations_correct_skill) / float(visitations_any_skill)
-        return probability
-
-    def print_state_distribution(self):
-        """Prints the observed probability of skills depending on the state we are in"""
-        print(self.state_visitations)
-        state_count = {k: 0 for k in range(self.num_states)}
-        for skill in range(len(self.state_visitations)):
-            for state in range(len(self.state_visitations[0])):
-                state_count[state] += self.state_visitations[skill][state]
-        probability_visitations = [[row[ix] / max(1.0, state_count[ix]) for ix in range(len(row))] for row in
-                                   self.state_visitations]
-        print(" ")
-        print(probability_visitations)
-        print(" ")
-
-class Manager_Frozen_Worker_Wrapper(Wrapper):
-    """Open AI gym wrapper to help create an environment where manager learns to act by instructing a frozen worker"""
-    def __init__(self, env, num_skills, timesteps_before_changing_skill, skills_agent):
-        Wrapper.__init__(self, env)
-        self.action_space = spaces.Discrete(num_skills)
-        self.timesteps_before_changing_skill = timesteps_before_changing_skill
-        self.skills_agent = skills_agent
-
-    def step(self, action):
-        """Moves a step in manager environment which involves committing to using a skill for a set number of timesteps"""
-        next_state = self.env.unwrapped.s
-        cumulative_reward = 0
-        for _ in range(self.timesteps_before_changing_skill):
-            with torch.no_grad():
-                skill_action = self.skills_agent.pick_action(np.array([next_state[0], action]))
-            next_state, reward, done, _ = self.env.step(skill_action)
-            cumulative_reward += reward
-            if done: break
-        return next_state, cumulative_reward, done, _
\ No newline at end of file
diff --git a/agents/hierarchical_agents/__pycache__/DIAYN.cpython-39.pyc b/agents/hierarchical_agents/__pycache__/DIAYN.cpython-39.pyc
deleted file mode 100644
index 01f99e8dbda8aadd750e93bb0167d6ca24664333..0000000000000000000000000000000000000000
Binary files a/agents/hierarchical_agents/__pycache__/DIAYN.cpython-39.pyc and /dev/null differ
diff --git a/agents/hierarchical_agents/__pycache__/HIRO.cpython-39.pyc b/agents/hierarchical_agents/__pycache__/HIRO.cpython-39.pyc
deleted file mode 100644
index caf6b1fb8d3b80b0dbfd24cb30fcaabbb09ab580..0000000000000000000000000000000000000000
Binary files a/agents/hierarchical_agents/__pycache__/HIRO.cpython-39.pyc and /dev/null differ
diff --git a/agents/hierarchical_agents/__pycache__/SNN_HRL.cpython-39.pyc b/agents/hierarchical_agents/__pycache__/SNN_HRL.cpython-39.pyc
deleted file mode 100644
index d91419a055d4dbcd68d35245cac311ee8c574d4f..0000000000000000000000000000000000000000
Binary files a/agents/hierarchical_agents/__pycache__/SNN_HRL.cpython-39.pyc and /dev/null differ
diff --git a/agents/hierarchical_agents/__pycache__/h_DQN.cpython-39.pyc b/agents/hierarchical_agents/__pycache__/h_DQN.cpython-39.pyc
deleted file mode 100644
index 04a1597a4fdcdbc378d5f69697383af8be08d32c..0000000000000000000000000000000000000000
Binary files a/agents/hierarchical_agents/__pycache__/h_DQN.cpython-39.pyc and /dev/null differ
diff --git a/agents/hierarchical_agents/h_DQN.py b/agents/hierarchical_agents/h_DQN.py
deleted file mode 100644
index a9fcc75cf6a4c22a634dc209128b0e2d35634dd8..0000000000000000000000000000000000000000
--- a/agents/hierarchical_agents/h_DQN.py
+++ /dev/null
@@ -1,121 +0,0 @@
-import copy
-import numpy as np
-from agents.Base_Agent import Base_Agent
-from agents.DQN_agents.DDQN import DDQN
-
-class h_DQN(Base_Agent):
-    """Implements hierarchical RL agent h-DQN from paper Kulkarni et al. (2016) https://arxiv.org/abs/1604.06057?context=stat
-    Note also that this algorithm only works when we have discrete states and discrete actions currently because otherwise
-    it is not clear what it means to achieve a subgoal state designated by the meta-controller"""
-    agent_name = "h-DQN"
-
-    def __init__(self, config):
-        Base_Agent.__init__(self, config)
-        self.controller_config = copy.deepcopy(config)
-        self.controller_config.hyperparameters = self.controller_config.hyperparameters["CONTROLLER"]
-        self.controller = DDQN(self.controller_config)
-        self.controller.q_network_local = self.create_NN(input_dim=self.state_size*2, output_dim=self.action_size,
-                                                         key_to_use="CONTROLLER")
-        self.meta_controller_config = copy.deepcopy(config)
-        self.meta_controller_config.hyperparameters = self.meta_controller_config.hyperparameters["META_CONTROLLER"]
-        self.meta_controller = DDQN(self.meta_controller_config)
-        self.meta_controller.q_network_local = self.create_NN(input_dim=self.state_size, output_dim=self.state_size,
-                                                              key_to_use="META_CONTROLLER")
-        self.rolling_intrinsic_rewards = []
-        self.goals_seen = []
-        self.controller_learnt_enough = False
-        self.controller_actions = []
-
-    def reset_game(self):
-        """Resets the game information so we are ready to play a new episode"""
-        self.state = self.environment.reset()
-        self.next_state = None
-        self.action = None
-        self.reward = None
-        self.done = False
-        self.cumulative_meta_controller_reward = 0
-        self.episode_over = False
-        self.subgoal_achieved = False
-        self.total_episode_score_so_far = 0
-        self.meta_controller_steps = 0
-        self.update_learning_rate(self.controller_config.hyperparameters["learning_rate"], self.controller.q_network_optimizer)
-        self.update_learning_rate(self.meta_controller_config.hyperparameters["learning_rate"], self.meta_controller.q_network_optimizer)
-
-    def step(self):
-
-        self.episode_steps = 0
-
-        while not self.episode_over:
-            episode_intrinsic_rewards = []
-            self.meta_controller_state = self.environment.state
-            self.subgoal = self.meta_controller.pick_action(state=self.meta_controller_state)
-            self.goals_seen.append(self.subgoal)
-            self.subgoal_achieved = False
-            self.state = np.concatenate((self.environment.state, np.array([self.subgoal])))
-            self.cumulative_meta_controller_reward = 0
-
-            while not (self.episode_over or self.subgoal_achieved):
-                self.pick_and_conduct_controller_action()
-                self.update_data()
-                if self.time_to_learn(self.controller.memory, self.global_step_number, "CONTROLLER"): #means it is time to train controller
-                    for _ in range(self.hyperparameters["CONTROLLER"]["learning_iterations"]):
-                        self.controller.learn()
-                self.save_experience(memory=self.controller.memory, experience=(self.state, self.action, self.reward, self.next_state, self.done))
-                self.state = self.next_state #this is to set the state for the next iteration
-                self.global_step_number += 1
-                episode_intrinsic_rewards.append(self.reward)
-
-            if self.time_to_learn(self.meta_controller.memory, self.meta_controller_steps, "META_CONTROLLER"):
-                for _ in range(self.hyperparameters["META_CONTROLLER"]["learning_iterations"]):
-                    self.meta_controller.learn()
-
-            self.save_experience(memory=self.meta_controller.memory,
-                                 experience=(self.meta_controller_state, self.subgoal, self.cumulative_meta_controller_reward,
-                                             self.meta_controller_next_state, self.episode_over))
-            self.meta_controller_steps += 1
-            self.episode_steps += 1
-
-        self.rolling_intrinsic_rewards.append(np.sum(episode_intrinsic_rewards))
-        if self.episode_number % 100 == 0:
-            print(" ")
-            print("Most common goal -- {} -- ".format( max(set(self.goals_seen[-100:]), key=self.goals_seen[-100:].count)  ))
-            print("Intrinsic Rewards -- {} -- ".format(np.mean(self.rolling_intrinsic_rewards[-100:])))
-            print("Average controller action -- {} ".format(np.mean(self.controller_actions[-100:])))
-            print("Latest subgoal -- {}".format(self.goals_seen[-1]))
-        self.episode_number += 1
-        self.controller.episode_number += 1
-        self.meta_controller.episode_number += 1
-
-    def pick_and_conduct_controller_action(self):
-        """Picks and conducts an action for controller"""
-        self.action =  self.controller.pick_action(state=self.state)
-        self.controller_actions.append(self.action)
-        self.conduct_action()
-
-    def update_data(self):
-        """Updates stored data for controller and meta-controller. It must occur in the order shown"""
-        self.episode_over = self.environment.get_done()
-        self.update_controller_data()
-        self.update_meta_controller_data()
-
-    def update_controller_data(self):
-        """Gets the next state, reward and done information from the environment"""
-        environment_next_state = self.environment.get_next_state()
-        assert environment_next_state.shape[0] == 1
-        self.next_state = np.concatenate((environment_next_state, np.array([self.subgoal])))
-        self.subgoal_achieved = environment_next_state[0] == self.subgoal
-        self.reward = 1.0 * self.subgoal_achieved
-        self.done = self.subgoal_achieved or self.episode_over
-
-    def update_meta_controller_data(self):
-        """Updates data relating to meta controller"""
-        self.cumulative_meta_controller_reward += self.environment.get_reward()
-        self.total_episode_score_so_far += self.environment.get_reward()
-        if self.done:
-            self.meta_controller_next_state = self.environment.get_next_state()
-
-    def time_to_learn(self, memory, steps_taken, controller_name):
-        """Boolean indicating whether it is time for meta-controller or controller to learn"""
-        enough_experiences = len(memory) > self.hyperparameters[controller_name]["batch_size"]
-        enough_steps_taken = steps_taken % self.hyperparameters[controller_name]["update_every_n_steps"] == 0
-        return enough_experiences and enough_steps_taken
diff --git a/agents/policy_gradient_agents/PPO.py b/agents/policy_gradient_agents/PPO.py
deleted file mode 100644
index a1910101edd121ec84ea9e52652ec37e6abf9f87..0000000000000000000000000000000000000000
--- a/agents/policy_gradient_agents/PPO.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import copy
-import sys
-import torch
-import numpy as np
-from torch import optim
-from agents.Base_Agent import Base_Agent
-from exploration_strategies.Epsilon_Greedy_Exploration import Epsilon_Greedy_Exploration
-from utilities.Parallel_Experience_Generator import Parallel_Experience_Generator
-from utilities.Utility_Functions import normalise_rewards, create_actor_distribution
-
-class PPO(Base_Agent):
-    """Proximal Policy Optimization agent"""
-    agent_name = "PPO"
-
-    def __init__(self, config):
-        Base_Agent.__init__(self, config)
-        self.policy_output_size = self.calculate_policy_output_size()
-        self.policy_new = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size)
-        self.policy_old = self.create_NN(input_dim=self.state_size, output_dim=self.policy_output_size)
-        self.policy_old.load_state_dict(copy.deepcopy(self.policy_new.state_dict()))
-        self.policy_new_optimizer = optim.Adam(self.policy_new.parameters(), lr=self.hyperparameters["learning_rate"], eps=1e-4)
-        self.episode_number = 0
-        self.many_episode_states = []
-        self.many_episode_actions = []
-        self.many_episode_rewards = []
-        self.experience_generator = Parallel_Experience_Generator(self.environment, self.policy_new, self.config.seed,
-                                                                  self.hyperparameters, self.action_size)
-        self.exploration_strategy = Epsilon_Greedy_Exploration(self.config)
-
-    def calculate_policy_output_size(self):
-        """Initialises the policies"""
-        if self.action_types == "DISCRETE":
-            return self.action_size
-        elif self.action_types == "CONTINUOUS":
-            return self.action_size * 2 #Because we need 1 parameter for mean and 1 for std of distribution
-
-    def step(self):
-        """Runs a step for the PPO agent"""
-        exploration_epsilon =  self.exploration_strategy.get_updated_epsilon_exploration({"episode_number": self.episode_number})
-        self.many_episode_states, self.many_episode_actions, self.many_episode_rewards = self.experience_generator.play_n_episodes(
-            self.hyperparameters["episodes_per_learning_round"], exploration_epsilon)
-        self.episode_number += self.hyperparameters["episodes_per_learning_round"]
-        self.policy_learn()
-        self.update_learning_rate(self.hyperparameters["learning_rate"], self.policy_new_optimizer)
-        self.equalise_policies()
-
-    def policy_learn(self):
-        """A learning iteration for the policy"""
-        all_discounted_returns = self.calculate_all_discounted_returns()
-        if self.hyperparameters["normalise_rewards"]:
-            all_discounted_returns = normalise_rewards(all_discounted_returns)
-        for _ in range(self.hyperparameters["learning_iterations_per_round"]):
-            all_ratio_of_policy_probabilities = self.calculate_all_ratio_of_policy_probabilities()
-            loss = self.calculate_loss([all_ratio_of_policy_probabilities], all_discounted_returns)
-            self.take_policy_new_optimisation_step(loss)
-
-    def calculate_all_discounted_returns(self):
-        """Calculates the cumulative discounted return for each episode which we will then use in a learning iteration"""
-        all_discounted_returns = []
-        for episode in range(len(self.many_episode_states)):
-            discounted_returns = [0]
-            for ix in range(len(self.many_episode_states[episode])):
-                return_value = self.many_episode_rewards[episode][-(ix + 1)] + self.hyperparameters["discount_rate"]*discounted_returns[-1]
-                discounted_returns.append(return_value)
-            discounted_returns = discounted_returns[1:]
-            all_discounted_returns.extend(discounted_returns[::-1])
-        return all_discounted_returns
-
-    def calculate_all_ratio_of_policy_probabilities(self):
-        """For each action calculates the ratio of the probability that the new policy would have picked the action vs.
-         the probability the old policy would have picked it. This will then be used to inform the loss"""
-        all_states = [state for states in self.many_episode_states for state in states]
-        all_actions = [[action] if self.action_types == "DISCRETE" else action for actions in self.many_episode_actions for action in actions ]
-        all_states = torch.stack([torch.Tensor(states).float().to(self.device) for states in all_states])
-
-        all_actions = torch.stack([torch.Tensor(actions).float().to(self.device) for actions in all_actions])
-        all_actions = all_actions.view(-1, len(all_states))
-
-        new_policy_distribution_log_prob = self.calculate_log_probability_of_actions(self.policy_new, all_states, all_actions)
-        old_policy_distribution_log_prob = self.calculate_log_probability_of_actions(self.policy_old, all_states, all_actions)
-        ratio_of_policy_probabilities = torch.exp(new_policy_distribution_log_prob) / (torch.exp(old_policy_distribution_log_prob) + 1e-8)
-        return ratio_of_policy_probabilities
-
-    def calculate_log_probability_of_actions(self, policy, states, actions):
-        """Calculates the log probability of an action occuring given a policy and starting state"""
-        policy_output = policy.forward(states).to(self.device)
-        policy_distribution = create_actor_distribution(self.action_types, policy_output, self.action_size)
-        policy_distribution_log_prob = policy_distribution.log_prob(actions)
-        return policy_distribution_log_prob
-
-    def calculate_loss(self, all_ratio_of_policy_probabilities, all_discounted_returns):
-        """Calculates the PPO loss"""
-        all_ratio_of_policy_probabilities = torch.squeeze(torch.stack(all_ratio_of_policy_probabilities))
-        all_ratio_of_policy_probabilities = torch.clamp(input=all_ratio_of_policy_probabilities,
-                                                        min = -sys.maxsize,
-                                                        max = sys.maxsize)
-        all_discounted_returns = torch.tensor(all_discounted_returns).to(all_ratio_of_policy_probabilities)
-        potential_loss_value_1 = all_discounted_returns * all_ratio_of_policy_probabilities
-        potential_loss_value_2 = all_discounted_returns * self.clamp_probability_ratio(all_ratio_of_policy_probabilities)
-        loss = torch.min(potential_loss_value_1, potential_loss_value_2)
-        loss = -torch.mean(loss)
-        return loss
-
-    def clamp_probability_ratio(self, value):
-        """Clamps a value between a certain range determined by hyperparameter clip epsilon"""
-        return torch.clamp(input=value, min=1.0 - self.hyperparameters["clip_epsilon"],
-                                  max=1.0 + self.hyperparameters["clip_epsilon"])
-
-    def take_policy_new_optimisation_step(self, loss):
-        """Takes an optimisation step for the new policy"""
-        self.policy_new_optimizer.zero_grad()  # reset gradients to 0
-        loss.backward()  # this calculates the gradients
-        torch.nn.utils.clip_grad_norm_(self.policy_new.parameters(), self.hyperparameters[
-            "gradient_clipping_norm"])  # clip gradients to help stabilise training
-        self.policy_new_optimizer.step()  # this applies the gradients
-
-    def equalise_policies(self):
-        """Sets the old policy's parameters equal to the new policy's parameters"""
-        for old_param, new_param in zip(self.policy_old.parameters(), self.policy_new.parameters()):
-            old_param.data.copy_(new_param.data)
-
-    def save_result(self):
-        """Save the results seen by the agent in the most recent experiences"""
-        for ep in range(len(self.many_episode_rewards)):
-            total_reward = np.sum(self.many_episode_rewards[ep])
-            self.game_full_episode_scores.append(total_reward)
-            self.rolling_results.append(np.mean(self.game_full_episode_scores[-1 * self.rolling_score_window:]))
-        self.save_max_result_seen()
-
-    def locally_save_policy(self):
-        """Saves the policy"""
-        torch.save(self.policy_new.state_dict(), "{}/{}_network.pt".format(self.config.models_dir, self.agent_name))
diff --git a/agents/policy_gradient_agents/REINFORCE.py b/agents/policy_gradient_agents/REINFORCE.py
deleted file mode 100644
index 2b15bedf35124f0fee7e36d0c3f4345e1a99d011..0000000000000000000000000000000000000000
--- a/agents/policy_gradient_agents/REINFORCE.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import numpy as np
-import torch
-import torch.optim as optim
-from torch.distributions import Categorical
-from agents.Base_Agent import Base_Agent
-
-class REINFORCE(Base_Agent):
-    agent_name = "REINFORCE"
-    def __init__(self, config):
-        Base_Agent.__init__(self, config)
-        self.policy = self.create_NN(input_dim=self.state_size, output_dim=self.action_size)
-        self.optimizer = optim.Adam(self.policy.parameters(), lr=self.hyperparameters["learning_rate"])
-        self.episode_rewards = []
-        self.episode_log_probabilities = []
-
-    def reset_game(self):
-        """Resets the game information so we are ready to play a new episode"""
-        self.state = self.environment.reset()
-        self.next_state = None
-        self.action = None
-        self.reward = None
-        self.done = False
-        self.total_episode_score_so_far = 0
-        self.episode_rewards = []
-        self.episode_log_probabilities = []
-        self.episode_step_number = 0
-
-    def step(self):
-        """Runs a step within a game including a learning step if required"""
-        while not self.done:
-            self.pick_and_conduct_action_and_save_log_probabilities()
-            self.update_next_state_reward_done_and_score()
-            self.store_reward()
-            if self.time_to_learn():
-                self.actor_learn()
-            self.state = self.next_state #this is to set the state for the next iteration
-            self.episode_step_number += 1
-        self.episode_number += 1
-
-    def pick_and_conduct_action_and_save_log_probabilities(self):
-        """Picks and then conducts actions. Then saves the log probabilities of the actions it conducted to be used for
-        learning later"""
-        action, log_probabilities = self.pick_action_and_get_log_probabilities()
-        self.store_log_probabilities(log_probabilities)
-        self.store_action(action)
-        self.conduct_action(action)
-
-    def pick_action_and_get_log_probabilities(self):
-        """Picks actions and then calculates the log probabilities of the actions it picked given the policy"""
-        # PyTorch only accepts mini-batches and not individual observations so we have to add
-        # a "fake" dimension to our observation using unsqueeze
-        state = torch.from_numpy(self.state).float().unsqueeze(0).to(self.device)
-        action_probabilities = self.policy.forward(state).cpu()
-        action_distribution = Categorical(action_probabilities) # this creates a distribution to sample from
-        action = action_distribution.sample()
-        return action.item(), action_distribution.log_prob(action)
-
-    def store_log_probabilities(self, log_probabilities):
-        """Stores the log probabilities of picked actions to be used for learning later"""
-        self.episode_log_probabilities.append(log_probabilities)
-
-    def store_action(self, action):
-        """Stores the action picked"""
-        self.action = action
-
-    def store_reward(self):
-        """Stores the reward picked"""
-        self.episode_rewards.append(self.reward)
-
-    def actor_learn(self):
-        """Runs a learning iteration for the policy"""
-        total_discounted_reward = self.calculate_episode_discounted_reward()
-        policy_loss = self.calculate_policy_loss_on_episode(total_discounted_reward)
-        self.optimizer.zero_grad()
-        policy_loss.backward()
-        self.optimizer.step()
-
-    def calculate_episode_discounted_reward(self):
-        """Calculates the cumulative discounted return for the episode"""
-        discounts = self.hyperparameters["discount_rate"] ** np.arange(len(self.episode_rewards))
-        total_discounted_reward = np.dot(discounts, self.episode_rewards)
-        return total_discounted_reward
-
-    def calculate_policy_loss_on_episode(self, total_discounted_reward):
-        """Calculates the loss from an episode"""
-        policy_loss = []
-        for log_prob in self.episode_log_probabilities:
-            policy_loss.append(-log_prob * total_discounted_reward)
-        policy_loss = torch.cat(policy_loss).sum() # We need to add up the losses across the mini-batch to get 1 overall loss
-        return policy_loss
-
-    def time_to_learn(self):
-        """Tells us whether it is time for the algorithm to learn. With REINFORCE we only learn at the end of every
-        episode so this just returns whether the episode is over"""
-        return self.done
diff --git a/agents/policy_gradient_agents/__pycache__/PPO.cpython-310.pyc b/agents/policy_gradient_agents/__pycache__/PPO.cpython-310.pyc
deleted file mode 100644
index 0c0d43590e8c325928eaccdca5a52c2bdee5acd2..0000000000000000000000000000000000000000
Binary files a/agents/policy_gradient_agents/__pycache__/PPO.cpython-310.pyc and /dev/null differ
diff --git a/agents/policy_gradient_agents/__pycache__/PPO.cpython-39.pyc b/agents/policy_gradient_agents/__pycache__/PPO.cpython-39.pyc
deleted file mode 100644
index b9fc84f63185a2fc529e8f23acf0a4074c14e0a6..0000000000000000000000000000000000000000
Binary files a/agents/policy_gradient_agents/__pycache__/PPO.cpython-39.pyc and /dev/null differ
diff --git a/agents/policy_gradient_agents/__pycache__/REINFORCE.cpython-39.pyc b/agents/policy_gradient_agents/__pycache__/REINFORCE.cpython-39.pyc
deleted file mode 100644
index ee601f14b3785691bfac3c9a6e54149a81182409..0000000000000000000000000000000000000000
Binary files a/agents/policy_gradient_agents/__pycache__/REINFORCE.cpython-39.pyc and /dev/null differ
diff --git a/antiJamming_v1.py b/antiJamming_v1.py
new file mode 100644
index 0000000000000000000000000000000000000000..78128630e9d7e68c6a7f1c5352c7879ec1a8047f
--- /dev/null
+++ b/antiJamming_v1.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import gym
+import tensorflow as tf
+import tf_slim as slim
+import numpy as np
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import json
+from tensorflow import keras
+from ns3gym import ns3env
+
+env = gym.make('ns3-v0')
+ob_space = env.observation_space
+ac_space = env.action_space
+print("Observation space: ", ob_space, ob_space.dtype)
+print("Action space: ", ac_space, ac_space.n)
+
+s_size = ob_space.shape[0]
+a_size = ac_space.n
+jammerType = 'combined'
+
+model = keras.Sequential()
+model.add(keras.layers.Dense(s_size, input_shape=(s_size,), activation='relu'))
+model.add(keras.layers.Dense(32, activation='relu'))
+model.add(keras.layers.Dense(a_size, activation='softmax'))
+model.compile(optimizer=tf.optimizers.Adam(0.001),
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
+
+total_episodes = 1
+max_env_steps = 1
+env._max_episode_steps = max_env_steps
+
+epsilon = 1.0  # exploration rate
+epsilon_min = 0.01
+epsilon_decay = 0.99
+
+time_history = []
+rew_history = []
+
+# Training agent
+for e in range(total_episodes):
+
+    state = env.reset()
+    state = np.reshape(state, [1, s_size])
+    rewardsum = 0
+    for time in range(max_env_steps):
+        # Choose action
+        if np.random.rand(1) < epsilon:
+            action = np.random.randint(a_size)
+        else:
+            action = np.argmax(model.predict(state)[0])
+
+        # Step
+        next_state, reward, done, _ = env.step(action)
+
+        if done or time == max_env_steps - 1:
+            print("episode: {}/{}, time: {}, rew: {}, eps: {:.2}"
+                  .format(e, total_episodes, time, rewardsum, epsilon))
+            break
+
+        next_state = np.reshape(next_state, [1, s_size])
+
+        # Train
+        target = reward
+        if not done:
+            target = (reward + 0.95 * np.amax(model.predict(next_state)[0]))
+
+        target_f = model.predict(state)
+        target_f[0][action] = target
+        model.fit(state, target_f, epochs=1, verbose=0)
+
+        state = next_state
+        rewardsum += reward
+        if epsilon > epsilon_min: epsilon *= epsilon_decay
+
+    time_history.append(time)
+    rew_history.append(rewardsum)
+    # Implementing early break
+
+# Plotting Learning Performance
+print("Plot Learning Performance")
+mpl.rcdefaults()
+mpl.rcParams.update({'font.size': 16})
+
+fig, ax = plt.subplots(figsize=(10, 4))
+plt.grid(True, linestyle='--')
+plt.title('Learning Performance')
+plt.plot(range(len(time_history)), time_history, label='Steps', marker="^", linestyle=":")  # , color='red')
+plt.plot(range(len(rew_history)), rew_history, label='Reward', marker="", linestyle="-")  # , color='k')
+plt.xlabel('Episode')
+plt.ylabel('Time')
+plt.legend(prop={'size': 12})
+
+plt.savefig('learning.pdf', bbox_inches='tight')
+plt.show()
+
+# for n in range(2 ** s_size):
+#    state = [n >> i & 1 for i in range(0, 2)]
+#    state = np.reshape(state, [1, s_size])
+#    print("state " + str(state)
+#        + " -> prediction " + str(model.predict(state)[0])
+#        )
+
+# Testing agent
+n_runs = 1
+total_trans_pkts = 0
+
+for run in range(n_runs):
+    state = env.reset()
+    state = np.reshape(state, [1, s_size])
+    total_trans_pkts_per_run = 0
+    for time in range(max_env_steps):
+        # Choose Channel
+        action = np.argmax(model.predict(state)[0])
+        # Step
+        next_state, reward, done, _ = env.step(action)
+        total_trans_pkts_per_run += reward
+        if done or time == max_env_steps - 1:
+            break
+        next_state = np.reshape(next_state, [1, s_size])
+        # Test
+        state = next_state
+
+    print(f"Run: {run}/{n_runs}, Total transferred packets: {total_trans_pkts_per_run}")
+    total_trans_pkts += total_trans_pkts_per_run
+
+# print(model.get_config())
+# print(model.to_json())
+# print(model.get_weights())
+
+# Save Results for this time slots value
+normalizedThroughput = total_trans_pkts / (100 * n_runs)
+print(f'The normalized throughput is: {normalizedThroughput}')
+filename = f'{jammerType}_timeSlots_{max_env_steps}.json'
+with open(filename, 'w') as f:
+    json.dump(normalizedThroughput, f)
diff --git a/app.py b/app.py
index 820e21e8f194acb34fec68727114346dd6c3e704..926756fd86376968bc7407793d01f3a85d90f382 100644
--- a/app.py
+++ b/app.py
@@ -18,5 +18,11 @@ def main():
     st.write(f"Agent Type: {agent_type}")
     st.write(f"Channel Switching Cost: {channel_switching_cost}")
 
+    st.write("==================================================")
+    st.write("Training Starting")
+    st.write("Training completed")
+    st.write("==================================================")
+    st.write("")
+
 if __name__ == "__main__":
     main()
diff --git a/data/__init__.py b/data/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/data/__pycache__/__init__.cpython-310.pyc b/data/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index 4f0507b5ae1f10dfc554d188a9acee216ef0db4a..0000000000000000000000000000000000000000
Binary files a/data/__pycache__/__init__.cpython-310.pyc and /dev/null differ
diff --git a/data/__pycache__/__init__.cpython-39.pyc b/data/__pycache__/__init__.cpython-39.pyc
deleted file mode 100644
index a47b7ca23df42b3053cc7521634923266947ab28..0000000000000000000000000000000000000000
Binary files a/data/__pycache__/__init__.cpython-39.pyc and /dev/null differ
diff --git a/data/__pycache__/dataset.cpython-310.pyc b/data/__pycache__/dataset.cpython-310.pyc
deleted file mode 100644
index fbf7bb704d276417fefb79096ba21e8c4dc262eb..0000000000000000000000000000000000000000
Binary files a/data/__pycache__/dataset.cpython-310.pyc and /dev/null differ
diff --git a/data/__pycache__/dataset.cpython-39.pyc b/data/__pycache__/dataset.cpython-39.pyc
deleted file mode 100644
index 3fa61b51f6ff0e04e8e5fe34d37b63a3e54e6c51..0000000000000000000000000000000000000000
Binary files a/data/__pycache__/dataset.cpython-39.pyc and /dev/null differ
diff --git a/data/dataset.py b/data/dataset.py
deleted file mode 100644
index 91b12ac8886fa9a37a5fb11e0758a45972fda853..0000000000000000000000000000000000000000
--- a/data/dataset.py
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-# coding:utf-8
-"""
-name        : dataset.py,
-version     : 1.0.0,
-url         : https://github.com/abubakar-sani/RFF,
-license     : MIT License,
-copyright   : Copyright 2021 by Abubakar Sani Ali, Khalifa University,
-author      : Abubakar Sani Ali,
-email       : engrabubakarsani@gmail.com,
-date        : 9/5/2022,
-description : Dataset module that returns the processed data,
-"""
-
-import math
-import os
-import numpy as np
-import pandas as pd
-
-import json
-
-base_dir = os.path.abspath('')
-raw_dir = f'{base_dir}/datasets/raw/spectral_scans_QC9880_ht20_background'
-processed_dir = f'{base_dir}/datasets/processed/spectral_scans_QC9880_ht20_background'
-    
-    
-def load_spectral_scans(jammer, jammed_freq, jammer_dist, jamming_power, channels, n_features, n_scans=10):
-    if jammer == 'combined':
-        scenario = f'samples_chamber_{jammer_dist}cm_{jamming_power}dBm'
-    elif jammer == 'none':
-        interference = np.random.choice(['high', 'medium', 'low'], p=[0.4, 0.4, 0.2])
-        if interference == 'high':
-            scenario = f'samples_office_None'
-        elif interference == 'low':
-            scenario = f'samples_lab_None'
-        else:
-            scenario = f'samples_chamber_None'
-    else:
-        scenario = f'samples_chamber_{jammed_freq}MHz_{jammer_dist}cm_{jamming_power}dBm'
-    # Process the dataset and generate the raw numpy (to processing it every run)
-    if not os.path.isfile(f'{processed_dir}/{scenario}.npy'):
-        spectral_data = []
-        for channel in channels:
-            channel_data = np.empty((0, n_features), int)
-            for scan in range(n_scans):
-                if jammer == 'combined':
-                    scenario = f'samples_chamber_{channel}MHz_{jammer_dist}cm_{jamming_power}dBm'
-
-                df = pd.read_csv(f'{raw_dir}/{scenario}_{scan}.csv')
-                temp_data = df.where(df['freq1'] == channel).dropna()
-                temp_data = temp_data.to_numpy()
-                channel_data = np.append(channel_data, temp_data, axis=0)
-
-            spectral_data.append(channel_data)
-
-        if not os.path.exists(processed_dir):
-            os.makedirs(processed_dir)
-        if jammer == 'combined':
-            scenario = f'samples_chamber_{jammer_dist}cm_{jamming_power}dBm'
-        np.save(f'{processed_dir}/{scenario}', np.array(spectral_data, dtype='object'))
-
-    spectral_data = np.load(f'{processed_dir}/{scenario}.npy', allow_pickle=True)
-
-    return spectral_data
-
-
-def get_feats_dirs(scenario, scan=1):
-    spectral_df = pd.read_csv(f'{raw_dir}/{scenario}_{scan}.csv')
-    features = spectral_df.columns
-    n_features = len(features)
-    return features, n_features
-
-
-def process_data(data, channels, length, stride, time_step, mode=0):
-    if mode == 0:
-        min_len = len(data[0])
-        for i in range(len(data)):
-            min_len = len(data[i]) if len(data[i]) < min_len else min_len
-        t_samples = min_len
-    else:
-        t_samples = len(data)
-    n_samples = math.floor((t_samples - length) / stride)
-    if mode == 1:
-        # Breaking data into batches
-        data = data[0:t_samples]
-        batches = []
-        for sample in range(n_samples):
-            batch = []
-            for i in range(length):
-                batch_sample = data[(sample * stride) + i]
-                batch.append(batch_sample)
-            batches.append(batch)
-
-    else:
-        batches = []
-        for sample in range(n_samples):
-            batch = []
-            for i in range(len(channels)):
-                channel_data = data[i]
-                channel_samples = channel_data[(sample * stride):(length + (sample * stride)), :]
-                batch.append(channel_samples)
-            batches.append(batch)
-
-    processed_data = np.array(batches[time_step % len(batches)])
-
-    return processed_data
diff --git a/environments/RF_spectrum.py b/environments/RF_spectrum.py
deleted file mode 100644
index 7461e8165326970334f4919f54dd28d0cca43048..0000000000000000000000000000000000000000
--- a/environments/RF_spectrum.py
+++ /dev/null
@@ -1,190 +0,0 @@
-#!/usr/bin/env python
-# coding:utf-8
-"""
-name        : RF_spectrum.py,
-version     : 1.0.0,
-url         : https://github.com/abubakar-sani/Anti-Jam,
-license     : MIT License,
-copyright   : Copyright 2022 by Abubakar Sani Ali,
-author      : Abubakar Sani Ali,
-email       : engrabubakarsani@gmail.com,
-date        : 9/9/2022,
-description : Environment class for the anti-jamming problem,
-"""
-
-# %% Loading libraries
-import os
-import numpy as np
-import pandas as pd
-import gym
-from gym import spaces
-from gym.utils import seeding
-from random import randint
-from matplotlib import pyplot
-import matplotlib as mpl
-import copy
-import random
-from data.dataset import load_spectral_scans, process_data, get_feats_dirs
-
-
-class RfEnvironment(gym.Env):
-    environment_name = "Anti Jamming"
-
-    def __init__(self, jammers, jammer_dist, jamming_power, csc, channels, length, stride, n_scans=10):
-        self.selected_freq_prob = None
-        self.reward = None
-        self.jammed_freq = None
-        self.jammer = None
-        self.sweep_counter = None
-        self.channel_data = None
-        self.spectral_data = None
-        self.state = None
-        self.cstime = 0
-        self.csc = csc
-        self.interference_types = ['high', 'medium', 'low']
-        self.jammers = jammers
-        self.jammer_dists = [20]
-        self.jammer_dist = jammer_dist
-        self.jamming_powers = [10]
-        self.jamming_power = jamming_power
-        self.jammer_bandwidth = 20  # MHz
-        self.n_scans = n_scans
-        self.length = length
-        self.time_step = 1
-        self.stride = stride  # if self.mode == 1 else self.length
-        self.channels = channels
-        self.freq = self.channels[0]
-        self.previous_action = 0
-        self.scenario = f'samples_chamber_{self.freq}MHz_{self.jammer_dist}cm_{self.jamming_power}dBm'
-        self.features, self.n_features = get_feats_dirs(self.scenario)
-        self.stat_features = ['mean']
-
-        self.average_rssi = 0
-
-        self.action_space = spaces.Discrete(len(self.channels))
-
-        self.observation_size = len(self.channels)  # * (1 + (self.n_features - 1) * len(self.stat_features))
-        self.observation_space = spaces.Box(low=np.ones(self.observation_size) * -100,
-                                            high=np.ones(self.observation_size) * 100)
-        # self.seed()
-        self.reward_RF = 1
-        self.n_collisions = 0
-        self.trials = 5
-        self._max_episode_steps = 5
-        self.reward_threshold = 0.95 * self._max_episode_steps
-        self.id = "Dynamic Anti-jamming" if len(jammers) > 1 else f"{self.jammers[0]} Anti-jamming"
-
-    def seed(self, seed=None):
-        self.np_random, seed = seeding.np_random(seed)
-        return [seed]
-
-    def reset(self):
-        """
-        Here we reshuffle our dataset and then obtain the RF spectrum scans at the beginning of an episode
-
-        """
-        # Getting data
-        self.step_count = 0
-        self.jammer = np.random.choice(self.jammers)
-        self.jammer_dist = np.random.choice(self.jammer_dists)
-        self.jamming_power = np.random.choice(self.jamming_powers)
-
-        if self.jammer == 'constant':
-            self.jammed_freq = np.random.choice(self.channels)
-        elif self.jammer == 'sweeping':
-            self.sweep_counter = 0
-            self.jammed_freq = self.freq
-        elif self.jammer == 'random':
-            self.jammed_freq = np.random.choice(self.channels)
-        else:
-            self.jammed_freq = self.channels[0]
-
-        self.time_step = np.random.randint(100)  # Introducing randomisation at the start
-
-        self.spectral_data = load_spectral_scans(self.jammer, self.jammed_freq, self.jammer_dist, self.jamming_power,
-                                                 self.channels, self.n_features)
-        self.state = self.get_state(
-            process_data(self.spectral_data, self.channels, self.length, self.stride, self.time_step))
-
-        # self.state = np.eye(len(self.channels))[self.channels.index(self.jammed_freq)]
-
-        return self.state.flatten()
-
-    def step(self, action):
-        # Get reward of previous action taken (r_t)
-        self.step_count += 1
-        self.freq = self.channels[action]
-        if action != self.previous_action: self.cstime = 1
-
-        # When the agent transmits and no jammer
-        if self.jammer == 'none':
-            self.reward = self.get_reward(action)
-
-        if self.freq > (self.jammed_freq + self.jammer_bandwidth/2) or self.freq < (self.jammed_freq - self.jammer_bandwidth/2):
-            self.reward = self.get_reward(action)
-        else:
-            # There is collision
-            self.reward = 0
-            self.n_collisions += 1
-
-        self.previous_action = action
-
-        # Go to next state (s_t+1)
-        if self.jammer == 'sweeping':
-            self.sweep_counter = self.sweep_counter + 1
-            sweep_slot = self.sweep_counter % len(self.channels)
-            self.jammed_freq = self.channels[sweep_slot]
-        elif self.jammer == 'random':
-            self.jammed_freq = np.random.choice(self.channels)
-
-        self.spectral_data = load_spectral_scans(self.jammer, self.jammed_freq, self.jammer_dist, self.jamming_power,
-                                                 self.channels, self.n_features)
-
-        self.state = self.get_state(
-            process_data(self.spectral_data, self.channels, self.length, self.stride, self.time_step))
-
-        self.time_step = self.time_step + 1
-        # self.state = np.eye(len(self.channels))[self.channels.index(self.jammed_freq)]
-        if self.step_count >= self._max_episode_steps:
-            self.done = True
-        else:
-            self.done = False
-
-        return self.state.flatten(), self.reward, self.done, self.cstime
-
-    def get_state(self, processed_channel_data):
-        state = np.zeros((len(self.channels), 1))
-        for channel in range(len(self.channels)):
-            channel_state = self.construct_state(processed_channel_data[channel])
-            # freq = self.channels[channel]
-            # if freq > (self.jammed_freq + self.jammer_bandwidth/2) or freq < (self.jammed_freq - self.jammer_bandwidth/2):
-            #     inf_data = self.get_interference_data(channel)
-            #     channel_state = self.construct_state(inf_data)
-            # else:
-            #     channel_state = self.construct_state(processed_channel_data[channel])
-            state[channel, :] = channel_state
-        return state
-
-    def construct_state(self, processed_channel_data):
-        df_channel = pd.DataFrame(processed_channel_data, columns=self.features)
-        df_channel[df_channel['snr'] < -100] = np.NaN
-        df_channel[df_channel['snr'] > 100] = np.NaN
-        df_channel.fillna(df_channel['snr'].mean(), inplace=True)
-        state = df_channel['snr'].mean()
-        return state
-
-    def get_interference_data(self, channel):
-        jammer = 'none'
-        inf_spectral_data = load_spectral_scans(jammer, self.jammed_freq, self.jammer_dist, self.jamming_power,
-                                                self.channels, self.n_features)
-        data_index = np.where(np.array(self.channels) == np.array(self.channels)[channel])
-        inf_channel_data = inf_spectral_data[data_index[0][0]]
-        inf_data = process_data(inf_channel_data, self.channels, self.length, self.stride, self.time_step, 1)
-        return inf_data
-
-    def get_reward(self, action):
-        # Penalize agent for switching channel if the channel is not jammed
-        return self.reward_RF * (1 - self.csc) if action != self.previous_action else self.reward_RF
-
-    def get_score_to_win(self):
-        return self.reward_threshold
diff --git a/environments/__pycache__/RF_spectrum.cpython-310.pyc b/environments/__pycache__/RF_spectrum.cpython-310.pyc
deleted file mode 100644
index 670378a3bd0eb94173e9af970efd63852f9ea7cd..0000000000000000000000000000000000000000
Binary files a/environments/__pycache__/RF_spectrum.cpython-310.pyc and /dev/null differ
diff --git a/environments/__pycache__/RF_spectrum.cpython-39.pyc b/environments/__pycache__/RF_spectrum.cpython-39.pyc
deleted file mode 100644
index 671d17316b8cca86d8e9e8962da7c41e4cf26dd8..0000000000000000000000000000000000000000
Binary files a/environments/__pycache__/RF_spectrum.cpython-39.pyc and /dev/null differ
diff --git a/exploration_strategies/Base_Exploration_Strategy.py b/exploration_strategies/Base_Exploration_Strategy.py
deleted file mode 100644
index a8041d3ba21c327480b26808956f0358a93e7b3d..0000000000000000000000000000000000000000
--- a/exploration_strategies/Base_Exploration_Strategy.py
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-class Base_Exploration_Strategy(object):
-    """Base abstract class for agent exploration strategies. Every exploration strategy must inherit from this class
-    and implement the methods perturb_action_for_exploration_purposes and add_exploration_rewards"""
-    def __init__(self, config):
-        self.config = config
-
-    def perturb_action_for_exploration_purposes(self, action_info):
-        """Perturbs the action of the agent to encourage exploration"""
-        raise ValueError("Must be implemented")
-
-    def add_exploration_rewards(self, reward_info):
-        """Actions intrinsic rewards to encourage exploration"""
-        raise ValueError("Must be implemented")
-
-    def reset(self):
-        """Resets the noise process"""
-        raise ValueError("Must be implemented")
\ No newline at end of file
diff --git a/exploration_strategies/Epsilon_Greedy_Exploration.py b/exploration_strategies/Epsilon_Greedy_Exploration.py
deleted file mode 100644
index 98316c5e29a1be51fa370bd7a34da301a6e5f667..0000000000000000000000000000000000000000
--- a/exploration_strategies/Epsilon_Greedy_Exploration.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from exploration_strategies.Base_Exploration_Strategy import Base_Exploration_Strategy
-import numpy as np
-import random
-import torch
-
-class Epsilon_Greedy_Exploration(Base_Exploration_Strategy):
-    """Implements an epsilon greedy exploration strategy"""
-    def __init__(self, config):
-        super().__init__(config)
-        self.notified_that_exploration_turned_off = False
-        if "exploration_cycle_episodes_length" in self.config.hyperparameters.keys():
-            print("Using a cyclical exploration strategy")
-            self.exploration_cycle_episodes_length = self.config.hyperparameters["exploration_cycle_episodes_length"]
-        else:
-            self.exploration_cycle_episodes_length = None
-
-        if "random_episodes_to_run" in self.config.hyperparameters.keys():
-            self.random_episodes_to_run = self.config.hyperparameters["random_episodes_to_run"]
-            print("Running {} random episodes".format(self.random_episodes_to_run))
-        else:
-            self.random_episodes_to_run = 0
-
-    def perturb_action_for_exploration_purposes(self, action_info):
-        """Perturbs the action of the agent to encourage exploration"""
-        action_values = action_info["action_values"]
-        turn_off_exploration = action_info["turn_off_exploration"]
-        episode_number = action_info["episode_number"]
-        if turn_off_exploration and not self.notified_that_exploration_turned_off:
-            print(" ")
-            print("Exploration has been turned OFF")
-            print(" ")
-            self.notified_that_exploration_turned_off = True
-        epsilon = self.get_updated_epsilon_exploration(action_info)
-
-
-        if (random.random() > epsilon or turn_off_exploration) and (episode_number >= self.random_episodes_to_run):
-            return torch.argmax(action_values).item()
-        return  np.random.randint(0, action_values.shape[1])
-
-    def get_updated_epsilon_exploration(self, action_info, epsilon=1.0):
-        """Gets the probability that we just pick a random action. This probability decays the more episodes we have seen"""
-        episode_number = action_info["episode_number"]
-        epsilon_decay_denominator = self.config.hyperparameters["epsilon_decay_rate_denominator"]
-
-        if self.exploration_cycle_episodes_length is None:
-            epsilon = epsilon / (1.0 + (episode_number / epsilon_decay_denominator))
-        else:
-            epsilon = self.calculate_epsilon_with_cyclical_strategy(episode_number)
-        return epsilon
-
-    def calculate_epsilon_with_cyclical_strategy(self, episode_number):
-        """Calculates epsilon according to a cyclical strategy"""
-        max_epsilon = 0.5
-        min_epsilon = 0.001
-        increment = (max_epsilon - min_epsilon) / float(self.exploration_cycle_episodes_length / 2)
-        cycle = [ix for ix in range(int(self.exploration_cycle_episodes_length / 2))] + [ix for ix in range(
-            int(self.exploration_cycle_episodes_length / 2), 0, -1)]
-        cycle_ix = episode_number % self.exploration_cycle_episodes_length
-        epsilon = max_epsilon - cycle[cycle_ix] * increment
-        return epsilon
-
-    def add_exploration_rewards(self, reward_info):
-        """Actions intrinsic rewards to encourage exploration"""
-        return reward_info["reward"]
-
-    def reset(self):
-        """Resets the noise process"""
-        pass
diff --git a/exploration_strategies/Gaussian_Exploration.py b/exploration_strategies/Gaussian_Exploration.py
deleted file mode 100644
index c8186ee977232c227f941866ea2e8efff93e973c..0000000000000000000000000000000000000000
--- a/exploration_strategies/Gaussian_Exploration.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from exploration_strategies.Base_Exploration_Strategy import Base_Exploration_Strategy
-import torch
-from torch.distributions.normal import Normal
-
-class Gaussian_Exploration(Base_Exploration_Strategy):
-
-    """Gaussian noise exploration strategy"""
-    def __init__(self, config):
-        super().__init__(config)
-        self.action_noise_std = self.config.hyperparameters["action_noise_std"]
-        self.action_noise_distribution = Normal(torch.Tensor([0.0]), torch.Tensor([self.action_noise_std]))
-        self.action_noise_clipping_range = self.config.hyperparameters["action_noise_clipping_range"]
-
-
-    def perturb_action_for_exploration_purposes(self, action_info):
-        """Perturbs the action of the agent to encourage exploration"""
-        action = action_info["action"]
-        action_noise = self.action_noise_distribution.sample(sample_shape=action.shape)
-        action_noise = action_noise.squeeze(-1)
-        clipped_action_noise = torch.clamp(action_noise, min=-self.action_noise_clipping_range,
-                                           max=self.action_noise_clipping_range)
-        action += clipped_action_noise
-        return action
-
-    def add_exploration_rewards(self, reward_info):
-        """Actions intrinsic rewards to encourage exploration"""
-        raise ValueError("Must be implemented")
-
-    def reset(self):
-        """Resets the noise process"""
-        pass
-
diff --git a/exploration_strategies/OU_Noise_Exploration.py b/exploration_strategies/OU_Noise_Exploration.py
deleted file mode 100644
index 26a55b00f4614fb087be1f16ec6f2cb10c1850a1..0000000000000000000000000000000000000000
--- a/exploration_strategies/OU_Noise_Exploration.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from utilities.OU_Noise import OU_Noise
-from exploration_strategies.Base_Exploration_Strategy import Base_Exploration_Strategy
-
-class OU_Noise_Exploration(Base_Exploration_Strategy):
-    """Ornstein-Uhlenbeck noise process exploration strategy"""
-    def __init__(self, config):
-        super().__init__(config)
-        self.noise = OU_Noise(self.config.action_size, self.config.seed, self.config.hyperparameters["mu"],
-                              self.config.hyperparameters["theta"], self.config.hyperparameters["sigma"])
-
-    def perturb_action_for_exploration_purposes(self, action_info):
-        """Perturbs the action of the agent to encourage exploration"""
-        action = action_info["action"]
-        action += self.noise.sample()
-        return action
-
-    def add_exploration_rewards(self, reward_info):
-        """Actions intrinsic rewards to encourage exploration"""
-        raise ValueError("Must be implemented")
-
-    def reset(self):
-        """Resets the noise process"""
-        self.noise.reset()
\ No newline at end of file
diff --git a/exploration_strategies/__init__.py b/exploration_strategies/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/exploration_strategies/__pycache__/Base_Exploration_Strategy.cpython-310.pyc b/exploration_strategies/__pycache__/Base_Exploration_Strategy.cpython-310.pyc
deleted file mode 100644
index 1e9d2bfb1323b86b747d0814471b915fafafb1e0..0000000000000000000000000000000000000000
Binary files a/exploration_strategies/__pycache__/Base_Exploration_Strategy.cpython-310.pyc and /dev/null differ
diff --git a/exploration_strategies/__pycache__/Base_Exploration_Strategy.cpython-39.pyc b/exploration_strategies/__pycache__/Base_Exploration_Strategy.cpython-39.pyc
deleted file mode 100644
index 8613abcbe4546e9463ddddcf857b5b34b24018d8..0000000000000000000000000000000000000000
Binary files a/exploration_strategies/__pycache__/Base_Exploration_Strategy.cpython-39.pyc and /dev/null differ
diff --git a/exploration_strategies/__pycache__/Epsilon_Greedy_Exploration.cpython-310.pyc b/exploration_strategies/__pycache__/Epsilon_Greedy_Exploration.cpython-310.pyc
deleted file mode 100644
index 4ee44f457938953cce75c02bc6c9dfaec37751a2..0000000000000000000000000000000000000000
Binary files a/exploration_strategies/__pycache__/Epsilon_Greedy_Exploration.cpython-310.pyc and /dev/null differ
diff --git a/exploration_strategies/__pycache__/Epsilon_Greedy_Exploration.cpython-39.pyc b/exploration_strategies/__pycache__/Epsilon_Greedy_Exploration.cpython-39.pyc
deleted file mode 100644
index a9f381ade691b9b2418d837cf61bb464268533b7..0000000000000000000000000000000000000000
Binary files a/exploration_strategies/__pycache__/Epsilon_Greedy_Exploration.cpython-39.pyc and /dev/null differ
diff --git a/exploration_strategies/__pycache__/OU_Noise_Exploration.cpython-39.pyc b/exploration_strategies/__pycache__/OU_Noise_Exploration.cpython-39.pyc
deleted file mode 100644
index 5b3f24d3a6d6a1fcd2119149ca8eaa48751682ec..0000000000000000000000000000000000000000
Binary files a/exploration_strategies/__pycache__/OU_Noise_Exploration.cpython-39.pyc and /dev/null differ
diff --git a/exploration_strategies/__pycache__/__init__.cpython-310.pyc b/exploration_strategies/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index af6ed67120230fe80b24de58e23fb29be09a1263..0000000000000000000000000000000000000000
Binary files a/exploration_strategies/__pycache__/__init__.cpython-310.pyc and /dev/null differ
diff --git a/exploration_strategies/__pycache__/__init__.cpython-39.pyc b/exploration_strategies/__pycache__/__init__.cpython-39.pyc
deleted file mode 100644
index 942cbb86cccd1f678dd2714e9c69dd15b3700998..0000000000000000000000000000000000000000
Binary files a/exploration_strategies/__pycache__/__init__.cpython-39.pyc and /dev/null differ
diff --git a/models/exp_0.05_constant/DDQN with Prioritised Replay_network.pt b/models/exp_0.05_constant/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index c5791d022a3a7a479b1e57554023955518dabef3..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_constant/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.05_constant/DDQN_network.pt b/models/exp_0.05_constant/DDQN_network.pt
deleted file mode 100644
index 143c66bb3f788f8ef01d050b4105c1c23649024b..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_constant/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_constant/DQN with Fixed Q Targets_network.pt b/models/exp_0.05_constant/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 38603d33efec057ed32ea7c4c10a556c03e92548..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_constant/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.05_constant/DQN_network.pt b/models/exp_0.05_constant/DQN_network.pt
deleted file mode 100644
index faeac15a06eba5e8d7cfbf242c7f0116176a84cc..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_constant/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_constant/Dueling DDQN_network.pt b/models/exp_0.05_constant/Dueling DDQN_network.pt
deleted file mode 100644
index ef8692ce10eaa4d966f9a43829397fd1b250c641..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_constant/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_dynamic/DDQN with Prioritised Replay_network.pt b/models/exp_0.05_dynamic/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 731b2842d6699b5eb8bbce52e58de8a3a29262e5..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_dynamic/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.05_dynamic/DDQN_network.pt b/models/exp_0.05_dynamic/DDQN_network.pt
deleted file mode 100644
index 4e276b321dcf0adec24da1a620c22355b3d14be2..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_dynamic/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_dynamic/DQN with Fixed Q Targets_network.pt b/models/exp_0.05_dynamic/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index d9f5644ddb38054b9e3125fd82b8fa3021c50691..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_dynamic/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.05_dynamic/DQN_network.pt b/models/exp_0.05_dynamic/DQN_network.pt
deleted file mode 100644
index fd641301cb0f440999d7da19f8daedadf963b64f..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_dynamic/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_dynamic/Dueling DDQN_network.pt b/models/exp_0.05_dynamic/Dueling DDQN_network.pt
deleted file mode 100644
index 7a7ca55d2c7ad74654d0e0e14e6a041444fdae61..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_dynamic/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_random/DDQN with Prioritised Replay_network.pt b/models/exp_0.05_random/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 0ac643430123bdf9b50c3b0012fbdfc7104bc17d..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_random/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.05_random/DDQN_network.pt b/models/exp_0.05_random/DDQN_network.pt
deleted file mode 100644
index bd54b168c15800c22d1f983e58660ba5876af9f1..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_random/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_random/DQN with Fixed Q Targets_network.pt b/models/exp_0.05_random/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 43a0ad624fe71128413ef205b1b783d2c480385d..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_random/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.05_random/DQN_network.pt b/models/exp_0.05_random/DQN_network.pt
deleted file mode 100644
index 4572f6064cb8b2455acf81644adfeceb98911084..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_random/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_random/Dueling DDQN_network.pt b/models/exp_0.05_random/Dueling DDQN_network.pt
deleted file mode 100644
index a6efbdb53aa98433377cc8a2a46fdff057907e62..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_random/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_sweeping/DDQN with Prioritised Replay_network.pt b/models/exp_0.05_sweeping/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 570dfe0c23323991218be4f1aeafab13e5caaa20..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_sweeping/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.05_sweeping/DDQN_network.pt b/models/exp_0.05_sweeping/DDQN_network.pt
deleted file mode 100644
index f6676582b228686347464fc7f48caf85971e9c83..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_sweeping/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_sweeping/DQN with Fixed Q Targets_network.pt b/models/exp_0.05_sweeping/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index e5297e7f477564d287a768021b0d457605c7308a..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_sweeping/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.05_sweeping/DQN_network.pt b/models/exp_0.05_sweeping/DQN_network.pt
deleted file mode 100644
index 3c91c9658acf63b92a1613c011355bdab204d825..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_sweeping/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.05_sweeping/Dueling DDQN_network.pt b/models/exp_0.05_sweeping/Dueling DDQN_network.pt
deleted file mode 100644
index 4b9d6711aab8f88d892c4bf068f9d3478f8dc936..0000000000000000000000000000000000000000
Binary files a/models/exp_0.05_sweeping/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_constant/DDQN with Prioritised Replay_network.pt b/models/exp_0.0_constant/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 763d4e95f79d4004d8c7530a6f0a73d712fea102..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_constant/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.0_constant/DDQN_network.pt b/models/exp_0.0_constant/DDQN_network.pt
deleted file mode 100644
index ce1bfa294a0531386cde63554e0d1f844f0b75f4..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_constant/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_constant/DQN with Fixed Q Targets_network.pt b/models/exp_0.0_constant/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 968b2c53218bb76d44f4f88b52374533799fb545..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_constant/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.0_constant/DQN_network.pt b/models/exp_0.0_constant/DQN_network.pt
deleted file mode 100644
index fe37cd04e9bccbfbdf1eb040d414b3e31690926b..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_constant/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_constant/Dueling DDQN_network.pt b/models/exp_0.0_constant/Dueling DDQN_network.pt
deleted file mode 100644
index 2de0b1bec5f5d43da69a657cfb29de391e40f713..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_constant/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_dynamic/DDQN with Prioritised Replay_network.pt b/models/exp_0.0_dynamic/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 04737609e350203f5bd01eef6f0fe2c0076cc760..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_dynamic/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.0_dynamic/DDQN_network.pt b/models/exp_0.0_dynamic/DDQN_network.pt
deleted file mode 100644
index 89aec13794edcea2cc893e54e9e5d81e908948fd..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_dynamic/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_dynamic/DQN with Fixed Q Targets_network.pt b/models/exp_0.0_dynamic/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 5f7b29869dd2bb1dc4a6e2d0dbb94b371c12bc5e..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_dynamic/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.0_dynamic/DQN_network.pt b/models/exp_0.0_dynamic/DQN_network.pt
deleted file mode 100644
index af61a14b96dbd4b24c26cddf0eb89c784cfaa9c4..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_dynamic/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_dynamic/Dueling DDQN_network.pt b/models/exp_0.0_dynamic/Dueling DDQN_network.pt
deleted file mode 100644
index af5df4010ae89a8b7f027d831fc034cc82d6e1b7..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_dynamic/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_random/DDQN with Prioritised Replay_network.pt b/models/exp_0.0_random/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index c40c9a105602d98cb24e51685cbf30eccb5c6702..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_random/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.0_random/DDQN_network.pt b/models/exp_0.0_random/DDQN_network.pt
deleted file mode 100644
index bcef471246b7748f00c2ff1f6be58db67e19294d..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_random/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_random/DQN with Fixed Q Targets_network.pt b/models/exp_0.0_random/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 0f4ab2944e305360774be745f6c6e9f939ae9cc2..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_random/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.0_random/DQN_network.pt b/models/exp_0.0_random/DQN_network.pt
deleted file mode 100644
index c703a27cfabd1e8e4bb1c0f77149ac4c99aa5ef0..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_random/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_random/Dueling DDQN_network.pt b/models/exp_0.0_random/Dueling DDQN_network.pt
deleted file mode 100644
index c0487b5ec8b2eb3de9dbf98fe1ed86547cc4c892..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_random/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_sweeping/DDQN with Prioritised Replay_network.pt b/models/exp_0.0_sweeping/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 9b0252b5e0515cff87fdf9b00d3107fea517a47c..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_sweeping/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.0_sweeping/DDQN_network.pt b/models/exp_0.0_sweeping/DDQN_network.pt
deleted file mode 100644
index b25a001888e05d367c583191511e53bf979c80ae..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_sweeping/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_sweeping/DQN with Fixed Q Targets_network.pt b/models/exp_0.0_sweeping/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 71ab1bc42bd3cf2a55b2ec60d9536f8560ee202b..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_sweeping/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.0_sweeping/DQN_network.pt b/models/exp_0.0_sweeping/DQN_network.pt
deleted file mode 100644
index 5129875a03ff95def3a01e5d278ecda7034c388a..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_sweeping/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.0_sweeping/Dueling DDQN_network.pt b/models/exp_0.0_sweeping/Dueling DDQN_network.pt
deleted file mode 100644
index 35835ff7b2ed88d4a0ac0b2cc3768cf4826302ed..0000000000000000000000000000000000000000
Binary files a/models/exp_0.0_sweeping/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_constant/DDQN with Prioritised Replay_network.pt b/models/exp_0.15_constant/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index e9683f7b09fca2320f2ff66ec9fde1f786ae5cac..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_constant/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.15_constant/DDQN_network.pt b/models/exp_0.15_constant/DDQN_network.pt
deleted file mode 100644
index ed393e9ff6982d5d34a0102d05c4141fe239c649..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_constant/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_constant/DQN with Fixed Q Targets_network.pt b/models/exp_0.15_constant/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 250b35c1e300ed44b25e876b3fd8bddc209954a2..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_constant/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.15_constant/DQN_network.pt b/models/exp_0.15_constant/DQN_network.pt
deleted file mode 100644
index a07171da0acd5373a23987356a678d9207de4d40..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_constant/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_constant/Dueling DDQN_network.pt b/models/exp_0.15_constant/Dueling DDQN_network.pt
deleted file mode 100644
index 4fbec003d58989bb562b7c0b74a876f8a668ea25..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_constant/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_dynamic/DDQN with Prioritised Replay_network.pt b/models/exp_0.15_dynamic/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index d728047388aa8679b36f247982d5877205152e85..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_dynamic/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.15_dynamic/DDQN_network.pt b/models/exp_0.15_dynamic/DDQN_network.pt
deleted file mode 100644
index e98582684d695dfbd64939f8146d2ee635945e8c..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_dynamic/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_dynamic/DQN with Fixed Q Targets_network.pt b/models/exp_0.15_dynamic/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 93d5909ec3db118e145c137119d67ad18d1fa941..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_dynamic/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.15_dynamic/DQN_network.pt b/models/exp_0.15_dynamic/DQN_network.pt
deleted file mode 100644
index 0c42e5516fb2fb31e505e11f983b64f603479b14..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_dynamic/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_dynamic/Dueling DDQN_network.pt b/models/exp_0.15_dynamic/Dueling DDQN_network.pt
deleted file mode 100644
index 639516a932ca8d2b57542557f52ba71ae284b56c..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_dynamic/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_random/DDQN with Prioritised Replay_network.pt b/models/exp_0.15_random/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index af399b7ca246b2f0f3c8f3b6a3e4f3b53816cbe6..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_random/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.15_random/DDQN_network.pt b/models/exp_0.15_random/DDQN_network.pt
deleted file mode 100644
index 7fc467b32cc772578fcc5c24f4e1c9ad099fe1c3..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_random/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_random/DQN with Fixed Q Targets_network.pt b/models/exp_0.15_random/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index dd4efb06d4f4c7d4a4eea3bf3645b2b22ac43368..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_random/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.15_random/DQN_network.pt b/models/exp_0.15_random/DQN_network.pt
deleted file mode 100644
index ea6c206e33a87db6a3084640a9ca98815342d02b..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_random/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_random/Dueling DDQN_network.pt b/models/exp_0.15_random/Dueling DDQN_network.pt
deleted file mode 100644
index aa955af86f1f2e5f252caf574d9765851a0d189e..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_random/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_sweeping/DDQN with Prioritised Replay_network.pt b/models/exp_0.15_sweeping/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 26bc2349f91939049c12e0061e04be5d3cd8b8bc..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_sweeping/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.15_sweeping/DDQN_network.pt b/models/exp_0.15_sweeping/DDQN_network.pt
deleted file mode 100644
index 0906d0863a876079986e202eae90fdd9d2ebc97d..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_sweeping/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_sweeping/DQN with Fixed Q Targets_network.pt b/models/exp_0.15_sweeping/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index ae5a0b7aa5b5255ae6857dbc25c045139cd332c9..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_sweeping/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.15_sweeping/DQN_network.pt b/models/exp_0.15_sweeping/DQN_network.pt
deleted file mode 100644
index 97f551e043609030538617adb940b4767d2fa6a8..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_sweeping/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.15_sweeping/Dueling DDQN_network.pt b/models/exp_0.15_sweeping/Dueling DDQN_network.pt
deleted file mode 100644
index e2399da1ee4ff8b6b2a0f70589c5ac7a94e6a666..0000000000000000000000000000000000000000
Binary files a/models/exp_0.15_sweeping/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_constant/DDQN with Prioritised Replay_network.pt b/models/exp_0.1_constant/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index f9faac06101c67cfa0f9546f8ba8cfdfc31c0013..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_constant/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.1_constant/DDQN_network.pt b/models/exp_0.1_constant/DDQN_network.pt
deleted file mode 100644
index 82a632fe6c7993b30fbb64e6db54e97f8a87b748..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_constant/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_constant/DQN with Fixed Q Targets_network.pt b/models/exp_0.1_constant/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 3c9befb524b2ff1247a0c7db2a4ce704978d6e6b..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_constant/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.1_constant/DQN_network.pt b/models/exp_0.1_constant/DQN_network.pt
deleted file mode 100644
index f401ca15697c3cee46534d2545b68a372a7ecc0c..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_constant/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_constant/Dueling DDQN_network.pt b/models/exp_0.1_constant/Dueling DDQN_network.pt
deleted file mode 100644
index 04556260ad707135145e8ddbd4cfc4c5c59f0297..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_constant/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_dynamic/DDQN with Prioritised Replay_network.pt b/models/exp_0.1_dynamic/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index fb7177088340e1f71d632d4aae6564c6f44656bd..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_dynamic/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.1_dynamic/DDQN_network.pt b/models/exp_0.1_dynamic/DDQN_network.pt
deleted file mode 100644
index 600f530aa254f7a6bfc4ee1205fdf057462c5fbb..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_dynamic/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_dynamic/DQN with Fixed Q Targets_network.pt b/models/exp_0.1_dynamic/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index fc1773e7975e95c05b161a46b1342abf74f14397..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_dynamic/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.1_dynamic/DQN_network.pt b/models/exp_0.1_dynamic/DQN_network.pt
deleted file mode 100644
index ad20a5d5dd07f85518990c3a7bde531f2eaee2a9..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_dynamic/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_dynamic/Dueling DDQN_network.pt b/models/exp_0.1_dynamic/Dueling DDQN_network.pt
deleted file mode 100644
index 7c98c01250b69017788ea6143d2742a376a0ab61..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_dynamic/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_random/DDQN with Prioritised Replay_network.pt b/models/exp_0.1_random/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 0d9d90a143c1f9b5ae8690da22380d7ca1443630..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_random/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.1_random/DDQN_network.pt b/models/exp_0.1_random/DDQN_network.pt
deleted file mode 100644
index 079c6aa62c62a8bdcec9a421b1fdf0e3776397c4..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_random/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_random/DQN with Fixed Q Targets_network.pt b/models/exp_0.1_random/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 270eb746817abc3575cfe0b665de46f08841ba8e..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_random/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.1_random/DQN_network.pt b/models/exp_0.1_random/DQN_network.pt
deleted file mode 100644
index f207d0f6bb3267afab7b9789487186679985a7c8..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_random/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_random/Dueling DDQN_network.pt b/models/exp_0.1_random/Dueling DDQN_network.pt
deleted file mode 100644
index 241fe3f255225c8a730dd916d4f62468edff66d5..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_random/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_sweeping/DDQN with Prioritised Replay_network.pt b/models/exp_0.1_sweeping/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 8ccaeb9d661e1af5f639bca61ad280b9c6445917..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_sweeping/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.1_sweeping/DDQN_network.pt b/models/exp_0.1_sweeping/DDQN_network.pt
deleted file mode 100644
index 23f8592ababca7bab5ce68f4667a84473c5c48c5..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_sweeping/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_sweeping/DQN with Fixed Q Targets_network.pt b/models/exp_0.1_sweeping/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 43fdcf2d14bc9f50c7b01ac8297da8482c886174..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_sweeping/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.1_sweeping/DQN_network.pt b/models/exp_0.1_sweeping/DQN_network.pt
deleted file mode 100644
index 462a9908cef65a44217972c6d92cad37e82ced79..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_sweeping/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.1_sweeping/Dueling DDQN_network.pt b/models/exp_0.1_sweeping/Dueling DDQN_network.pt
deleted file mode 100644
index 8d45e531baca28264525fc5d8fca4e19e6537ecb..0000000000000000000000000000000000000000
Binary files a/models/exp_0.1_sweeping/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.2_constant/DDQN with Prioritised Replay_network.pt b/models/exp_0.2_constant/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 7c7905af3526a4de444c544a614958c554fa2fd9..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_constant/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.2_constant/DDQN_network.pt b/models/exp_0.2_constant/DDQN_network.pt
deleted file mode 100644
index 05c6efae8c18adbc9c5461845ff2f7dc1b449ccb..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_constant/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.2_constant/DQN with Fixed Q Targets_network.pt b/models/exp_0.2_constant/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 7b86b588645b0e862e046602ef296517cc996ffa..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_constant/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.2_constant/DQN_network.pt b/models/exp_0.2_constant/DQN_network.pt
deleted file mode 100644
index 8bfb6cb176f489b6de0869ad95a64c6a4779b385..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_constant/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.2_constant/Dueling DDQN_network.pt b/models/exp_0.2_constant/Dueling DDQN_network.pt
deleted file mode 100644
index acfd3227b3348d9dd061af44a5d0627d00b3fdb7..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_constant/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.2_dynamic/DDQN with Prioritised Replay_network.pt b/models/exp_0.2_dynamic/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 8bb0f557a69717fcd7c7d45f7de86aae09d2276f..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_dynamic/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.2_dynamic/DDQN_network.pt b/models/exp_0.2_dynamic/DDQN_network.pt
deleted file mode 100644
index ad8c97bf839345a527ed170ab765f083cae38235..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_dynamic/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.2_dynamic/DQN with Fixed Q Targets_network.pt b/models/exp_0.2_dynamic/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 974afee2fe1c02c5ecb5c866e06d668432b08307..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_dynamic/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.2_dynamic/DQN_network.pt b/models/exp_0.2_dynamic/DQN_network.pt
deleted file mode 100644
index 87b1096646e03259be42ea054a5ca1c5f5161888..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_dynamic/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.2_dynamic/Dueling DDQN_network.pt b/models/exp_0.2_dynamic/Dueling DDQN_network.pt
deleted file mode 100644
index 36ae72887a65339760505fefecc2425ee370c187..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_dynamic/Dueling DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.2_sweeping/DDQN with Prioritised Replay_network.pt b/models/exp_0.2_sweeping/DDQN with Prioritised Replay_network.pt
deleted file mode 100644
index 5efbed791fccf46818a1e172f0fa2f0e8b4fcd77..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_sweeping/DDQN with Prioritised Replay_network.pt and /dev/null differ
diff --git a/models/exp_0.2_sweeping/DDQN_network.pt b/models/exp_0.2_sweeping/DDQN_network.pt
deleted file mode 100644
index c11f1809da20126e9f76c859b0b07c5319c73dcf..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_sweeping/DDQN_network.pt and /dev/null differ
diff --git a/models/exp_0.2_sweeping/DQN with Fixed Q Targets_network.pt b/models/exp_0.2_sweeping/DQN with Fixed Q Targets_network.pt
deleted file mode 100644
index 1ad8152fe972d41cd1566189a861dc10bdeab74f..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_sweeping/DQN with Fixed Q Targets_network.pt and /dev/null differ
diff --git a/models/exp_0.2_sweeping/DQN_network.pt b/models/exp_0.2_sweeping/DQN_network.pt
deleted file mode 100644
index b80f836ab15caabca99840496286690e0428ff4e..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_sweeping/DQN_network.pt and /dev/null differ
diff --git a/models/exp_0.2_sweeping/Dueling DDQN_network.pt b/models/exp_0.2_sweeping/Dueling DDQN_network.pt
deleted file mode 100644
index ab163897b318a3bd7eb82253eca63a07c5de6064..0000000000000000000000000000000000000000
Binary files a/models/exp_0.2_sweeping/Dueling DDQN_network.pt and /dev/null differ
diff --git a/results/Anti_Jam.py b/results/Anti_Jam.py
deleted file mode 100644
index 438ab0210caa0c1768fd7f2678f128d64fc5ebfd..0000000000000000000000000000000000000000
--- a/results/Anti_Jam.py
+++ /dev/null
@@ -1,174 +0,0 @@
-import os
-import sys
-from os.path import dirname, abspath
-from pathlib import Path
-
-sys.path.append(dirname(dirname(abspath(__file__))))
-
-import warnings
-
-warnings.filterwarnings("ignore", category=UserWarning)
-
-import numpy as np
-from utilities.data_structures.Config import Config
-from agents.DQN_agents.DQN import DQN
-from agents.DQN_agents.DQN_With_Fixed_Q_Targets import DQN_With_Fixed_Q_Targets
-from agents.DQN_agents.DDQN import DDQN
-from agents.DQN_agents.DDQN_With_Prioritised_Experience_Replay import DDQN_With_Prioritised_Experience_Replay
-from agents.DQN_agents.Dueling_DDQN import Dueling_DDQN
-from agents.Trainer import Trainer
-from environments.RF_spectrum import RfEnvironment
-
-config = Config()
-config.seed = 1
-# Setup environment
-jammers = ['constant']
-jammer = 'dynamic' if len(jammers) > 1 else jammers[0]
-env_settings = sys.argv
-jammer_distance = 20  # cm
-jamming_power = 10  # dBm
-all_channels = [list(range(5180, 5340, 20))]
-all_channels = [freq for band in range(len(all_channels)) for freq in all_channels[band]]
-length = 32  # int(sample_rate * sensing_window)
-stride = 1
-csc = 0.2
-env = RfEnvironment(jammers, jammer_distance, jamming_power, csc, all_channels, length, stride)
-ob_space = env.features
-ac_space = env.action_space
-a_size = len(all_channels)
-s_size = env.observation_size
-config.environment = env
-
-config.base_dir = os.path.abspath('')
-config.models_dir = f'{config.base_dir}/models/exp_{csc}_{jammer}'
-if not os.path.exists(config.models_dir):
-    os.makedirs(config.models_dir)
-config.file_to_save_data_results = f"{config.base_dir}/results/data_and_graphs/Anti_Jam_training_{csc}_{jammer}.pkl"
-config.file_to_save_results_graph = f"{config.base_dir}/results/data_and_graphs/Anti_Jam_training_{csc}_{jammer}.png"
-
-config.num_episodes_to_run = 1
-config.runs_per_agent = 3
-config.show_solution_score = False
-config.visualise_individual_results = False
-config.visualise_overall_agent_results = True
-config.standard_deviation_results = 1.0
-config.use_GPU = False
-config.overwrite_existing_results_file = True
-config.randomise_random_seed = True
-config.save_model = True
-config.training = True
-
-config.hyperparameters = {
-    "DQN_Agents": {
-        "learning_rate": 0.001,
-        "batch_size": 64,
-        "buffer_size": 100000,
-        "epsilon": 1.0,
-        "epsilon_decay_rate_denominator": 1,
-        "discount_rate": 0.99,
-        "tau": 0.01,
-        "alpha_prioritised_replay": 0.6,
-        "beta_prioritised_replay": 0.1,
-        "incremental_td_error": 1e-8,
-        "update_every_n_steps": 1,
-        "linear_hidden_units": [256, 256],
-        "final_layer_activation": "None",
-        "batch_norm": False,
-        "gradient_clipping_norm": 0.7,
-        "learning_iterations": 1,
-        "clip_rewards": False,
-        "HER_sample_proportion": 0.8,
-        "y_range": (-1, 14)
-    },
-    "Stochastic_Policy_Search_Agents": {
-        "policy_network_type": "Linear",
-        "noise_scale_start": 1e-2,
-        "noise_scale_min": 1e-3,
-        "noise_scale_max": 2.0,
-        "noise_scale_growth_factor": 2.0,
-        "stochastic_action_decision": False,
-        "num_policies": 10,
-        "episodes_per_policy": 1,
-        "num_policies_to_keep": 5,
-        "clip_rewards": False
-    },
-    "Policy_Gradient_Agents": {
-        "learning_rate": 0.05,
-        "linear_hidden_units": [256, 256],
-        "final_layer_activation": "SOFTMAX",
-        "learning_iterations_per_round": 5,
-        "discount_rate": 0.99,
-        "batch_norm": False,
-        "clip_epsilon": 0.1,
-        "episodes_per_learning_round": 4,
-        "normalise_rewards": True,
-        "gradient_clipping_norm": 7.0,
-        "mu": 0.0,  # only required for continuous action games
-        "theta": 0.0,  # only required for continuous action games
-        "sigma": 0.0,  # only required for continuous action games
-        "epsilon_decay_rate_denominator": 1.0,
-        "clip_rewards": False
-    },
-
-    "Actor_Critic_Agents": {
-
-        "learning_rate": 0.005,
-        "linear_hidden_units": [256, 256],
-        "final_layer_activation": ["SOFTMAX", None],
-        "gradient_clipping_norm": 5.0,
-        "discount_rate": 0.99,
-        "epsilon_decay_rate_denominator": 1.0,
-        "normalise_rewards": True,
-        "exploration_worker_difference": 2.0,
-        "clip_rewards": False,
-
-        "Actor": {
-            "learning_rate": 0.0003,
-            "linear_hidden_units": [256, 256],
-            "final_layer_activation": "Softmax",
-            "batch_norm": False,
-            "tau": 0.005,
-            "gradient_clipping_norm": 5,
-            "initialiser": "Xavier"
-        },
-
-        "Critic": {
-            "learning_rate": 0.0003,
-            "linear_hidden_units": [256, 256],
-            "final_layer_activation": None,
-            "batch_norm": False,
-            "buffer_size": 1000000,
-            "tau": 0.005,
-            "gradient_clipping_norm": 5,
-            "initialiser": "Xavier"
-        },
-
-        "min_steps_before_learning": 400,
-        "batch_size": 64,
-        "discount_rate": 0.99,
-        "mu": 0.0,  # for O-H noise
-        "theta": 0.15,  # for O-H noise
-        "sigma": 0.25,  # for O-H noise
-        "action_noise_std": 0.2,  # for TD3
-        "action_noise_clipping_range": 0.5,  # for TD3
-        "update_every_n_steps": 1,
-        "learning_updates_per_learning_session": 1,
-        "automatically_tune_entropy_hyperparameter": True,
-        "entropy_term_weight": None,
-        "add_extra_noise": False,
-        "do_evaluation_iterations": True
-    }
-}
-
-if __name__ == "__main__":
-    # Training
-    AGENTS = [DQN, DQN_With_Fixed_Q_Targets, DDQN, Dueling_DDQN, DDQN_With_Prioritised_Experience_Replay]
-    trainer = Trainer(config, AGENTS)
-    trainer.run_games_for_agents()
-    # Testing
-    config.training = False
-    config.runs_per_agent = 1
-    config.file_to_save_data_results = f"{config.base_dir}/results/data_and_graphs/Anti_Jam_testing_{csc}_{jammer}.pkl"
-    config.file_to_save_results_graph = f"{config.base_dir}/results/data_and_graphs/Anti_Jam_testing_{csc}_{jammer}.png"
-    trainer = Trainer(config, AGENTS)
-    trainer.run_games_for_agents()
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.05_constant.pkl b/results/data_and_graphs/Anti_Jam_testing_0.05_constant.pkl
deleted file mode 100644
index 60a9d60c252d25e930adba41111616a3c2c9313c..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.05_constant.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.05_constant.png b/results/data_and_graphs/Anti_Jam_testing_0.05_constant.png
deleted file mode 100644
index 56c43131f4763909a8982eb4b80892a2116ec6b8..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.05_constant.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.05_dynamic.pkl b/results/data_and_graphs/Anti_Jam_testing_0.05_dynamic.pkl
deleted file mode 100644
index 3a8ea4fa3b6ddb8476873e9c169723e6b526e43d..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.05_dynamic.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.05_dynamic.png b/results/data_and_graphs/Anti_Jam_testing_0.05_dynamic.png
deleted file mode 100644
index e92227b6d56aacc21ce0fdf08191fbd48c38bd9d..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.05_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.05_random.pkl b/results/data_and_graphs/Anti_Jam_testing_0.05_random.pkl
deleted file mode 100644
index ed547e5cdef53b411246c26fe8ce6d7bb956ac42..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.05_random.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.05_random.png b/results/data_and_graphs/Anti_Jam_testing_0.05_random.png
deleted file mode 100644
index 2eb54d92ce03f49ef6a7a5a030ac8666297dc2d3..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.05_random.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.05_sweeping.pkl b/results/data_and_graphs/Anti_Jam_testing_0.05_sweeping.pkl
deleted file mode 100644
index 5514fbcab76662e66bec17d81251458d475bb7b8..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.05_sweeping.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.05_sweeping.png b/results/data_and_graphs/Anti_Jam_testing_0.05_sweeping.png
deleted file mode 100644
index 7e945858153a7b4b13ea88647dcc11d56d2e21d2..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.05_sweeping.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.0_constant.pkl b/results/data_and_graphs/Anti_Jam_testing_0.0_constant.pkl
deleted file mode 100644
index cebb7e58330258611379541d73bf05e1dbc0d48a..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.0_constant.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.0_constant.png b/results/data_and_graphs/Anti_Jam_testing_0.0_constant.png
deleted file mode 100644
index fa42193f135cc33a74af4b4f9caa632e83ddf843..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.0_constant.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.0_dynamic.pkl b/results/data_and_graphs/Anti_Jam_testing_0.0_dynamic.pkl
deleted file mode 100644
index cfea9c89122ebb23a002ab7b9c05f2c57b5fe269..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.0_dynamic.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.0_dynamic.png b/results/data_and_graphs/Anti_Jam_testing_0.0_dynamic.png
deleted file mode 100644
index 1234539a5fc5d8b0bcdd4db4cfdf0bb92fc4930b..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.0_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.0_random.pkl b/results/data_and_graphs/Anti_Jam_testing_0.0_random.pkl
deleted file mode 100644
index ee120ac41f349074fb29aa9207c574c28297694b..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.0_random.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.0_random.png b/results/data_and_graphs/Anti_Jam_testing_0.0_random.png
deleted file mode 100644
index 97b123005533c87733381e104413836eab6d041a..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.0_random.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.0_sweeping.pkl b/results/data_and_graphs/Anti_Jam_testing_0.0_sweeping.pkl
deleted file mode 100644
index 7687d9e9a8c6538e5238727e38a002f3ab847eef..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.0_sweeping.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.0_sweeping.png b/results/data_and_graphs/Anti_Jam_testing_0.0_sweeping.png
deleted file mode 100644
index a23c193a2580dd8b86b4bb88f318ef511c21b775..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.0_sweeping.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.15_constant.pkl b/results/data_and_graphs/Anti_Jam_testing_0.15_constant.pkl
deleted file mode 100644
index 9eb220e2f749789abf6efa9eda4a77b8fa8fc8f5..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.15_constant.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.15_constant.png b/results/data_and_graphs/Anti_Jam_testing_0.15_constant.png
deleted file mode 100644
index 3d81100120846766bf654dc32f5c2736aa618498..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.15_constant.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.15_dynamic.pkl b/results/data_and_graphs/Anti_Jam_testing_0.15_dynamic.pkl
deleted file mode 100644
index 526b25022356678fdfdb67b3269b0408b79bc8aa..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.15_dynamic.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.15_dynamic.png b/results/data_and_graphs/Anti_Jam_testing_0.15_dynamic.png
deleted file mode 100644
index 7fd0217cfbd98c6a720c090e725c2d5b413e908b..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.15_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.15_random.pkl b/results/data_and_graphs/Anti_Jam_testing_0.15_random.pkl
deleted file mode 100644
index e29e779bd826fee2563d2b7a1f919f488ed16da7..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.15_random.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.15_random.png b/results/data_and_graphs/Anti_Jam_testing_0.15_random.png
deleted file mode 100644
index e8c90009830ba17a4d9dd42f361dbb673a10983a..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.15_random.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.15_sweeping.pkl b/results/data_and_graphs/Anti_Jam_testing_0.15_sweeping.pkl
deleted file mode 100644
index 040bb77d4ee8e48ed02e4910acedbfdf415ca74f..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.15_sweeping.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.15_sweeping.png b/results/data_and_graphs/Anti_Jam_testing_0.15_sweeping.png
deleted file mode 100644
index f15438023c3b5a68beb33e083df24272146a033c..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.15_sweeping.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.1_constant.pkl b/results/data_and_graphs/Anti_Jam_testing_0.1_constant.pkl
deleted file mode 100644
index d31aafa6869d78ed27ceb1065ae069844334cfa1..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.1_constant.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.1_constant.png b/results/data_and_graphs/Anti_Jam_testing_0.1_constant.png
deleted file mode 100644
index 3f33c46f0efdd3cd41a9ba9c0a5fc2cd00268955..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.1_constant.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.1_dynamic.pkl b/results/data_and_graphs/Anti_Jam_testing_0.1_dynamic.pkl
deleted file mode 100644
index 06606c82d2114c31a7fddfe3174abd4f1d6031a1..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.1_dynamic.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.1_dynamic.png b/results/data_and_graphs/Anti_Jam_testing_0.1_dynamic.png
deleted file mode 100644
index c68f1e9fa8e19dffba28759c98f8c67263960aa9..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.1_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.1_random.pkl b/results/data_and_graphs/Anti_Jam_testing_0.1_random.pkl
deleted file mode 100644
index 2ad8e047c097a3c3116d3f05519475470862bf3f..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.1_random.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.1_random.png b/results/data_and_graphs/Anti_Jam_testing_0.1_random.png
deleted file mode 100644
index bde88e476ce47c01f00c4fe8e8bb96a76d372071..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.1_random.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.1_sweeping.pkl b/results/data_and_graphs/Anti_Jam_testing_0.1_sweeping.pkl
deleted file mode 100644
index 243d63f4bd8edeea85ccc8cf6d4e4e19d25386dd..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.1_sweeping.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.1_sweeping.png b/results/data_and_graphs/Anti_Jam_testing_0.1_sweeping.png
deleted file mode 100644
index e3f71751f5a0a9adc8511bec698f6beb83a7fa87..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.1_sweeping.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.2_constant.pkl b/results/data_and_graphs/Anti_Jam_testing_0.2_constant.pkl
deleted file mode 100644
index 654f62658830462ea63922a20d47b1bdf831a43a..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.2_constant.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.2_constant.png b/results/data_and_graphs/Anti_Jam_testing_0.2_constant.png
deleted file mode 100644
index 09d5f1fcb1857dd3583e2046a10d0a54d63cfc1e..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.2_constant.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.2_dynamic.pkl b/results/data_and_graphs/Anti_Jam_testing_0.2_dynamic.pkl
deleted file mode 100644
index cd54c8f57e91ee85e6707a3d079720d36683ea2d..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.2_dynamic.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.2_dynamic.png b/results/data_and_graphs/Anti_Jam_testing_0.2_dynamic.png
deleted file mode 100644
index 40005264be04b71f4d4c8734275ee68c03e2152f..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.2_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.2_sweeping.pkl b/results/data_and_graphs/Anti_Jam_testing_0.2_sweeping.pkl
deleted file mode 100644
index 5513ddec932047e7f1bf3558636f16dbd6efab1a..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.2_sweeping.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_testing_0.2_sweeping.png b/results/data_and_graphs/Anti_Jam_testing_0.2_sweeping.png
deleted file mode 100644
index cd412523320a8195717b30b60ecacfc25cbae7dd..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_testing_0.2_sweeping.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.05_constant.pkl b/results/data_and_graphs/Anti_Jam_training_0.05_constant.pkl
deleted file mode 100644
index 21174e3f5998053f7049260749d69e1ecb7a3578..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.05_constant.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.05_constant.png b/results/data_and_graphs/Anti_Jam_training_0.05_constant.png
deleted file mode 100644
index 3fa5a68b4d8a95ad68c55cde5dd5b9c3dc68f8b9..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.05_constant.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.05_dynamic.pkl b/results/data_and_graphs/Anti_Jam_training_0.05_dynamic.pkl
deleted file mode 100644
index 385ae40f33b81216528bdf945a5c21bc05900f1d..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.05_dynamic.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.05_dynamic.png b/results/data_and_graphs/Anti_Jam_training_0.05_dynamic.png
deleted file mode 100644
index 5e7320eff7cc4feeee2559df0cba4927eda72d0d..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.05_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.05_random.pkl b/results/data_and_graphs/Anti_Jam_training_0.05_random.pkl
deleted file mode 100644
index 58913607c2e38db3ea93dcac348e77f885a6e79f..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.05_random.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.05_random.png b/results/data_and_graphs/Anti_Jam_training_0.05_random.png
deleted file mode 100644
index 587d503e8c07a571c93551d949a0b7c0ea9369e8..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.05_random.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.05_sweeping.pkl b/results/data_and_graphs/Anti_Jam_training_0.05_sweeping.pkl
deleted file mode 100644
index 7365a4e8ef611e57d8af83e2885822356b652c8c..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.05_sweeping.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.05_sweeping.png b/results/data_and_graphs/Anti_Jam_training_0.05_sweeping.png
deleted file mode 100644
index 573a8315c3571c5422992c4a0eb2fcb134e27e75..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.05_sweeping.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_constant.pkl b/results/data_and_graphs/Anti_Jam_training_0.0_constant.pkl
deleted file mode 100644
index c6721186712e3b242de1a4b5e3aac1e74c5cab10..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_constant.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_constant.png b/results/data_and_graphs/Anti_Jam_training_0.0_constant.png
deleted file mode 100644
index 10e8efab9fb199ececec37149a2326438a93bb62..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_constant.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_constant1.png b/results/data_and_graphs/Anti_Jam_training_0.0_constant1.png
deleted file mode 100644
index 9cebadf79d32605478a5cf58fb1c5dd29a67b5e7..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_constant1.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_dynamic.pkl b/results/data_and_graphs/Anti_Jam_training_0.0_dynamic.pkl
deleted file mode 100644
index 81bbe2cc2dd5bebaa4794d51cc0d601fcd63fdd3..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_dynamic.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_dynamic.png b/results/data_and_graphs/Anti_Jam_training_0.0_dynamic.png
deleted file mode 100644
index 4cb61dc93fa8b486d336f88381da9e53ce381949..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_dynamic1.png b/results/data_and_graphs/Anti_Jam_training_0.0_dynamic1.png
deleted file mode 100644
index 95588375a9cbeae83f5fafc6a636c0948dcb31a2..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_dynamic1.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_random.pkl b/results/data_and_graphs/Anti_Jam_training_0.0_random.pkl
deleted file mode 100644
index dfc4e150f1d292fb3701c3687f65740a9fed74a4..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_random.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_random.png b/results/data_and_graphs/Anti_Jam_training_0.0_random.png
deleted file mode 100644
index 703a489a268fb2d925a04a6607327efc6006c9f8..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_random.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_random1.png b/results/data_and_graphs/Anti_Jam_training_0.0_random1.png
deleted file mode 100644
index ee99abdb51857b5710390f7266d3693fbeef245a..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_random1.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_sweeping.pkl b/results/data_and_graphs/Anti_Jam_training_0.0_sweeping.pkl
deleted file mode 100644
index 685af5124dd53b04ffbcda1faafbc3bcd80c20cf..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_sweeping.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_sweeping.png b/results/data_and_graphs/Anti_Jam_training_0.0_sweeping.png
deleted file mode 100644
index 64a3ba7374f74cbca393448c07724aff053682df..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_sweeping.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.0_sweeping1.png b/results/data_and_graphs/Anti_Jam_training_0.0_sweeping1.png
deleted file mode 100644
index be51993c02aa47f09132a6e75c6a716fc73a57ba..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.0_sweeping1.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.15_constant.pkl b/results/data_and_graphs/Anti_Jam_training_0.15_constant.pkl
deleted file mode 100644
index 72f7314dba115a11364eb46a0f069e7bd188a635..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.15_constant.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.15_constant.png b/results/data_and_graphs/Anti_Jam_training_0.15_constant.png
deleted file mode 100644
index 1611b1a5a59a5cda5d7bc6ca8954f27d71f50cb5..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.15_constant.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.15_dynamic.pkl b/results/data_and_graphs/Anti_Jam_training_0.15_dynamic.pkl
deleted file mode 100644
index df3cf85531a1d1cbc90efa4bedce7325a31d3c61..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.15_dynamic.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.15_dynamic.png b/results/data_and_graphs/Anti_Jam_training_0.15_dynamic.png
deleted file mode 100644
index d18e405bbe2d0dc15b2631e8363c26917f7cb0a8..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.15_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.15_random.pkl b/results/data_and_graphs/Anti_Jam_training_0.15_random.pkl
deleted file mode 100644
index 98ec476995021a7bd6d84c2d8e2c16b2e2d86e83..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.15_random.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.15_random.png b/results/data_and_graphs/Anti_Jam_training_0.15_random.png
deleted file mode 100644
index 743743bacc5c3ba4d531f39a795f4f18726781ff..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.15_random.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.15_sweeping.pkl b/results/data_and_graphs/Anti_Jam_training_0.15_sweeping.pkl
deleted file mode 100644
index 0d591ae9f9a5ab916364284769e7e23da77e8b2a..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.15_sweeping.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.15_sweeping.png b/results/data_and_graphs/Anti_Jam_training_0.15_sweeping.png
deleted file mode 100644
index c0b6dc28bcc9cc3d879dbd3b7b3d32a992e74d4d..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.15_sweeping.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.1_constant.pkl b/results/data_and_graphs/Anti_Jam_training_0.1_constant.pkl
deleted file mode 100644
index 8e8391c52dff83095f4a99fe11a97f137421b7a7..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.1_constant.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.1_constant.png b/results/data_and_graphs/Anti_Jam_training_0.1_constant.png
deleted file mode 100644
index 902e546061ec25ca53fcb38115e6ae9e88b4d658..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.1_constant.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.1_dynamic.pkl b/results/data_and_graphs/Anti_Jam_training_0.1_dynamic.pkl
deleted file mode 100644
index edfd7500b04aea1a52fabbce5af08bf0140c0c0a..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.1_dynamic.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.1_dynamic.png b/results/data_and_graphs/Anti_Jam_training_0.1_dynamic.png
deleted file mode 100644
index 593a0139280d838136fc0ba3a8af07ee978ae581..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.1_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.1_random.pkl b/results/data_and_graphs/Anti_Jam_training_0.1_random.pkl
deleted file mode 100644
index ccf9e12645deeb871a0173a9d3dc2d697473bb75..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.1_random.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.1_random.png b/results/data_and_graphs/Anti_Jam_training_0.1_random.png
deleted file mode 100644
index 829b92de13cb9839481e62230edfd947abce4c32..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.1_random.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.1_sweeping.pkl b/results/data_and_graphs/Anti_Jam_training_0.1_sweeping.pkl
deleted file mode 100644
index 4714d3a0ec006733b5f4a32719bb8c5ac0bd0499..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.1_sweeping.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.1_sweeping.png b/results/data_and_graphs/Anti_Jam_training_0.1_sweeping.png
deleted file mode 100644
index 6e3dfdded5f841e89c4ca58f621f1f164f862a84..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.1_sweeping.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.2_constant.pkl b/results/data_and_graphs/Anti_Jam_training_0.2_constant.pkl
deleted file mode 100644
index 479e0d843428e84162a5b4d8e4563ac791bc640f..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.2_constant.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.2_constant.png b/results/data_and_graphs/Anti_Jam_training_0.2_constant.png
deleted file mode 100644
index f69888a2be8c2ed8df8a6873388200fde4fab00c..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.2_constant.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.2_dynamic.pkl b/results/data_and_graphs/Anti_Jam_training_0.2_dynamic.pkl
deleted file mode 100644
index 3a0a3b4dfbb070ebe4c6d70ff2fffaca73508b70..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.2_dynamic.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.2_dynamic.png b/results/data_and_graphs/Anti_Jam_training_0.2_dynamic.png
deleted file mode 100644
index 50c4f8a74e29a955578b931ee8f4619d57f0b8cd..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.2_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.2_sweeping.pkl b/results/data_and_graphs/Anti_Jam_training_0.2_sweeping.pkl
deleted file mode 100644
index f2949d62c51540283d94e6fafcb0edebcbcd83b4..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.2_sweeping.pkl and /dev/null differ
diff --git a/results/data_and_graphs/Anti_Jam_training_0.2_sweeping.png b/results/data_and_graphs/Anti_Jam_training_0.2_sweeping.png
deleted file mode 100644
index d5ee58f11c5f347377b5f3fcf5b5b8162c58fbd1..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/Anti_Jam_training_0.2_sweeping.png and /dev/null differ
diff --git a/results/data_and_graphs/cst_dynamic.pdf b/results/data_and_graphs/cst_dynamic.pdf
deleted file mode 100644
index 0e400d1271b4c7f0bc70c4d0fa7b8b23f75ab176..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/cst_dynamic.pdf and /dev/null differ
diff --git a/results/data_and_graphs/cst_dynamic.png b/results/data_and_graphs/cst_dynamic.png
deleted file mode 100644
index 5df12c9648f17c808b25b9bce89b7f1388d14381..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/cst_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/plots.py b/results/data_and_graphs/plots.py
deleted file mode 100644
index 7a4bf611520af4a435304eac13f1b990c6664340..0000000000000000000000000000000000000000
--- a/results/data_and_graphs/plots.py
+++ /dev/null
@@ -1,196 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-#
-# SPDX-License-Identifier: GPL-3.0
-#
-# GNU Radio Python Flow Graph
-# Title: Not titled yet
-# Author: Abubakar Sani Ali
-# GNU Radio version: 3.8.1.0
-
-###################################################################################
-# Importing Libraries
-###################################################################################
-import json
-import matplotlib.pyplot as plt
-import numpy as np
-import seaborn as sns
-import pickle
-import pandas as pd
-
-sns.set_palette('colorblind')
-
-# jammers = ['constant', 'sweeping', 'random', 'dynamic']
-jammers = ['dynamic']
-agents = ['DQN', 'DQN with Fixed Q Targets', 'DDQN', 'Dueling DDQN', 'DDQN with Prioritised Replay']
-network = 'FNN'
-history = 1
-cscs = [0.0, 0.05, 0.1, 0.15]
-max_env_steps = 100
-Episodes = 100
-tot_iterations = max_env_steps * Episodes
-
-# experiment 1: Convergence time
-filename = f'Anti_Jam_training_{cscs[0]}_{jammers[-1]}.pkl'
-file = open(filename, 'rb')
-object_file = pickle.load(file)
-file.close()
-n_runs = 3
-meanConvergenceTime = []
-stdConvergenceTime = []
-for agent in agents:
-    convergenceTime = []
-    for run in range(n_runs):
-        agentName = object_file[f'{agent}']
-        time = agentName[run][4]
-        convergenceTime.append(time)
-    meanConvergenceTime.append(np.array(convergenceTime).mean())
-    stdConvergenceTime.append(np.array(convergenceTime).std())
-print(f'The convergence times are:')
-print(meanConvergenceTime)
-print(stdConvergenceTime)
-
-# experiment 2: Inference time
-# meanInferenceTime = []
-# stdInferenceTime = []
-# for agent in agents:
-#     inferenceTime = []
-#     for csc in cscs:
-#         filename = f'Anti_Jam_testing_{csc}_{jammer}.pkl'
-#         file = open(filename, 'rb')
-#         object_file = pickle.load(file)
-#         file.close()
-#         agentName = object_file[f'{agent}']
-#         time = agentName[0][4]
-#         inferenceTime.append(time)
-#     meanInferenceTime.append(np.array(inferenceTime).mean())
-#     stdInferenceTime.append(np.array(inferenceTime).std())
-# print(f'The inference times are:')
-# print(np.array(meanInferenceTime)/tot_iterations)
-# print(np.array(stdInferenceTime)/tot_iterations)
-# print(f'The inference speeds are:')
-# print(tot_iterations/np.array(meanInferenceTime))
-# print(tot_iterations/np.array(stdInferenceTime))
-
-# experiment 3 plots: rewards
-for csc in cscs:
-    rolling_rewards = np.empty((len(agents), n_runs, Episodes))
-    filename = f'Anti_Jam_training_{csc}_{jammers[0]}.pkl'
-    file = open(filename, 'rb')
-    object_file = pickle.load(file)
-    file.close()
-    for agent_idx in range(len(agents)):
-        agent = agents[agent_idx]
-        for run in range(n_runs):
-            agentName = object_file[f'{agent}']
-            rollingReward = agentName[run][1]
-            rolling_rewards[agent_idx][run] = rollingReward
-
-    # Compute the mean and standard deviation of the rolling rewards
-    mean_rewards = np.mean(rolling_rewards, axis=1)
-    std_rewards = np.std(rolling_rewards, axis=1)
-
-    # Plot the mean rolling rewards and the shaded standard deviation area
-    plotName = f'rolling_reward_{csc}_{jammers[0]}.pdf'
-    fig, ax = plt.subplots()
-    fig.set_figwidth(6)
-    fig.set_figheight(5)
-    for agent_idx in range(len(agents)):
-        ax.plot(mean_rewards[agent_idx], label=f'{agents[agent_idx]}')
-        ax.fill_between(range(Episodes), mean_rewards[agent_idx] - std_rewards[agent_idx],
-                        mean_rewards[agent_idx] + std_rewards[agent_idx], alpha=0.3)
-
-    ax.set_xlabel('Episode')
-    ax.set_ylabel('Rolling Average Reward')
-
-    # Updated legend position
-    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2)
-
-    # Adjust the bottom margin to create more space for the legend
-    plt.subplots_adjust(bottom=0.25)
-
-    plt.savefig(plotName, bbox_inches='tight')
-    plt.show()
-
-# experiment 4 plots: Throughput
-for jammer in jammers:
-    throughput = []
-    for csc in cscs:
-        filename = f'Anti_Jam_testing_{csc}_{jammer}.pkl'
-        file = open(filename, 'rb')
-        object_file = pickle.load(file)
-        file.close()
-        agentsThroughputs = []
-        for agent in agents:
-            agentName = object_file[f'{agent}']
-            episodeThroughputs = agentName[0][1]
-            meanEpisodeThroughput = np.array(episodeThroughputs).mean()
-            agentsThroughputs.append(meanEpisodeThroughput)
-        throughput.append(agentsThroughputs)
-    normalizedThroughput = np.transpose(np.array(throughput) / Episodes)
-    X_axis = np.arange(len(cscs))
-    plotName = f'throughput_{jammer}.pdf'
-    fig, ax = plt.subplots()
-    fig.set_figwidth(10)
-    fig.set_figheight(4)
-    plt.bar(X_axis - 0.3, normalizedThroughput[0], 0.15, label=agents[0])
-    plt.bar(X_axis - 0.15, normalizedThroughput[1], 0.15, label=agents[1])
-    plt.bar(X_axis + 0, normalizedThroughput[2], 0.15, label=agents[2])
-    plt.bar(X_axis + 0.15, normalizedThroughput[3], 0.15, label=agents[3])
-    plt.bar(X_axis + 0.3, normalizedThroughput[4], 0.15, label=agents[4])
-
-    plt.ylim((0.6, 1))
-    plt.xticks(X_axis, cscs)
-    plt.xlabel('Channel switching cost (CSC)')
-    plt.ylabel('Normalized Throughput')
-
-    # Updated legend position
-    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(agents))
-
-    # Adjust the bottom margin to create more space for the legend
-    plt.subplots_adjust(bottom=0.25)
-
-    plt.savefig(plotName, bbox_inches='tight')
-    plt.show()
-
-# experiment 5 plots: Channel Switching Times
-for jammer in jammers:
-    cstime = []
-    for csc in cscs:
-        filename = f'Anti_Jam_testing_{csc}_{jammer}.pkl'
-        file = open(filename, 'rb')
-        object_file = pickle.load(file)
-        file.close()
-        agentsCstimes = []
-        for agent in agents:
-            agentName = object_file[f'{agent}']
-            episodeCstimes = agentName[0][-1]
-            meanEpisodeCstime = np.array(episodeCstimes).mean()
-            agentsCstimes.append(meanEpisodeCstime)
-        cstime.append(agentsCstimes)
-    normalizedCstime = np.transpose(np.array(cstime) / Episodes)
-    X_axis = np.arange(len(cscs))
-    plotName = f'cst_{jammer}.pdf'
-    fig, ax = plt.subplots()
-    fig.set_figwidth(10)
-    fig.set_figheight(4)
-    plt.bar(X_axis - 0.3, normalizedCstime[0], 0.15, label=agents[0])
-    plt.bar(X_axis - 0.15, normalizedCstime[1], 0.15, label=agents[1])
-    plt.bar(X_axis + 0, normalizedCstime[2], 0.15, label=agents[2])
-    plt.bar(X_axis + 0.15, normalizedCstime[3], 0.15, label=agents[3])
-    plt.bar(X_axis + 0.3, normalizedCstime[4], 0.15, label=agents[4])
-
-    plt.ylim((0, 1))
-    plt.xticks(X_axis, cscs)
-    plt.xlabel('Channel switching cost (CSC)')
-    plt.ylabel('Normalized Channel Switiching Frequency')
-
-    # Updated legend position
-    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(agents))
-
-    # Adjust the bottom margin to create more space for the legend
-    plt.subplots_adjust(bottom=0.25)
-
-    plt.savefig(plotName, bbox_inches='tight')
-    plt.show()
diff --git a/results/data_and_graphs/rolling_reward_0.05_dynamic.pdf b/results/data_and_graphs/rolling_reward_0.05_dynamic.pdf
deleted file mode 100644
index d05f14e8d32167885d34558568feae7775c6ac95..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/rolling_reward_0.05_dynamic.pdf and /dev/null differ
diff --git a/results/data_and_graphs/rolling_reward_0.05_dynamic.png b/results/data_and_graphs/rolling_reward_0.05_dynamic.png
deleted file mode 100644
index 82b41788210ac534edfcc97615d617a99096273b..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/rolling_reward_0.05_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/rolling_reward_0.0_dynamic.pdf b/results/data_and_graphs/rolling_reward_0.0_dynamic.pdf
deleted file mode 100644
index 8097038bc3e9852136c20d02b10583a27938d402..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/rolling_reward_0.0_dynamic.pdf and /dev/null differ
diff --git a/results/data_and_graphs/rolling_reward_0.0_dynamic.png b/results/data_and_graphs/rolling_reward_0.0_dynamic.png
deleted file mode 100644
index 73683c457792b119baef586e3380319fb291f1d3..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/rolling_reward_0.0_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/rolling_reward_0.15_dynamic.pdf b/results/data_and_graphs/rolling_reward_0.15_dynamic.pdf
deleted file mode 100644
index faaafc3a5d90d706b8fcd93769a6daab93078016..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/rolling_reward_0.15_dynamic.pdf and /dev/null differ
diff --git a/results/data_and_graphs/rolling_reward_0.15_dynamic.png b/results/data_and_graphs/rolling_reward_0.15_dynamic.png
deleted file mode 100644
index d347fa573cd6b3e4aab5ac630bc24d1a91c3428c..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/rolling_reward_0.15_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/rolling_reward_0.1_dynamic.pdf b/results/data_and_graphs/rolling_reward_0.1_dynamic.pdf
deleted file mode 100644
index 147078b10e00c7685826222434e616a1c5f14f51..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/rolling_reward_0.1_dynamic.pdf and /dev/null differ
diff --git a/results/data_and_graphs/rolling_reward_0.1_dynamic.png b/results/data_and_graphs/rolling_reward_0.1_dynamic.png
deleted file mode 100644
index e74ba88763ff04e71bd5718568820ca26ed2d13a..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/rolling_reward_0.1_dynamic.png and /dev/null differ
diff --git a/results/data_and_graphs/throughput_dynamic.pdf b/results/data_and_graphs/throughput_dynamic.pdf
deleted file mode 100644
index 606e02fd5ba396adedcd990c03191ec43f1d7f9e..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/throughput_dynamic.pdf and /dev/null differ
diff --git a/results/data_and_graphs/throughput_dynamic.png b/results/data_and_graphs/throughput_dynamic.png
deleted file mode 100644
index 975a2e6d58f2d1c1a6b72546a4daa63c566768ca..0000000000000000000000000000000000000000
Binary files a/results/data_and_graphs/throughput_dynamic.png and /dev/null differ
diff --git a/utilities/Deepmind_RMS_Prop.py b/utilities/Deepmind_RMS_Prop.py
deleted file mode 100644
index e143d12e40f60b314cfcedbab239c9e154e47b17..0000000000000000000000000000000000000000
--- a/utilities/Deepmind_RMS_Prop.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import torch
-from torch.optim import Optimizer
-
-
-class DM_RMSprop(Optimizer):
-    """Implements the form of RMSProp used in DM 2015 Atari paper.
-    Inspired by https://github.com/spragunr/deep_q_rl/blob/master/deep_q_rl/updates.py"""
-
-    def __init__(self, params, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False):
-        if not 0.0 <= lr:
-            raise ValueError("Invalid learning rate: {}".format(lr))
-        if not 0.0 <= eps:
-            raise ValueError("Invalid epsilon value: {}".format(eps))
-        if not 0.0 <= momentum:
-            raise ValueError("Invalid momentum value: {}".format(momentum))
-        if not 0.0 <= weight_decay:
-            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
-        if not 0.0 <= alpha:
-            raise ValueError("Invalid alpha value: {}".format(alpha))
-
-        defaults = dict(lr=lr, momentum=momentum, alpha=alpha, eps=eps, centered=centered, weight_decay=weight_decay)
-        super(DM_RMSprop, self).__init__(params, defaults)
-
-    def __setstate__(self, state):
-        super(DM_RMSprop, self).__setstate__(state)
-        for group in self.param_groups:
-            group.setdefault('momentum', 0)
-            group.setdefault('centered', False)
-
-    def step(self, closure=None):
-        """Performs a single optimization step.
-
-        Arguments:
-            closure (callable, optional): A closure that reevaluates the model
-                and returns the loss.
-        """
-        loss = None
-        if closure is not None:
-            loss = closure()
-        for group in self.param_groups:
-            momentum = group['momentum']
-            sq_momentum = group['alpha']
-            epsilon = group['eps']
-
-            for p in group['params']:
-                if p.grad is None:
-                    continue
-                grad = p.grad.data
-                if grad.is_sparse:
-                    raise RuntimeError('RMSprop does not support sparse gradients')
-                state = self.state[p]
-
-                # State initialization
-                if len(state) == 0:
-                    state['step'] = 0
-                    state['square_avg'] = torch.zeros_like(p.data)
-                    if momentum > 0:
-                        state['momentum_buffer'] = torch.zeros_like(p.data)
-
-                mom_buffer = state['momentum_buffer']
-                square_avg = state['square_avg']
-
-
-                state['step'] += 1
-
-                mom_buffer.mul_(momentum)
-                mom_buffer.add_((1 - momentum) * grad)
-
-                square_avg.mul_(sq_momentum).addcmul_(1 - sq_momentum, grad, grad)
-
-                avg = (square_avg -  mom_buffer**2 + epsilon).sqrt()
-
-                p.data.addcdiv_(-group['lr'], grad, avg)
-
-        return loss
-
diff --git a/utilities/LLM_image.png b/utilities/LLM_image.png
deleted file mode 100644
index 2ad4e63b4fefba40619803cc9ebc191d681b8d05..0000000000000000000000000000000000000000
Binary files a/utilities/LLM_image.png and /dev/null differ
diff --git a/utilities/Memory_Shaper.py b/utilities/Memory_Shaper.py
deleted file mode 100644
index 54e11098a7786634d79e3ff1a82c1106c41762e4..0000000000000000000000000000000000000000
--- a/utilities/Memory_Shaper.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# NOT FINISHED
-from .data_structures.Action_Balanced_Replay_Buffer import Action_Balanced_Replay_Buffer
-from .data_structures.Replay_Buffer import Replay_Buffer
-import numpy as np
-import random
-
-class Memory_Shaper(object):
-    """Takes in the experience of full episodes and reshapes it according to macro-actions you define. Then it provides
-    a replay buffer with this reshaped data to learn from"""
-    def __init__(self, buffer_size, batch_size, seed, new_reward_fn, action_balanced_replay_buffer=True):
-        self.reset()
-        self.buffer_size = buffer_size
-        self.batch_size = batch_size
-        self.seed = seed
-        self.new_reward_fn = new_reward_fn
-        self.action_balanced_replay_buffer = action_balanced_replay_buffer
-
-    def put_adapted_experiences_in_a_replay_buffer(self, action_id_to_actions):
-        """Adds experiences to the replay buffer after re-imagining that the actions taken were macro-actions according to
-         action_rules as well as primitive actions.
-
-         NOTE that we want to put both primitive actions and macro-actions into replay buffer so that it can learn that
-         its better to do a macro-action rather than the same primitive actions (which we will enforce with reward penalty)
-         """
-
-        actions_to_action_id = {v: k for k, v in action_id_to_actions.items()}
-
-        self.num_actions = len(action_id_to_actions)
-
-        print(actions_to_action_id)
-
-        for key in actions_to_action_id.keys():
-            assert isinstance(key, tuple)
-            assert isinstance(actions_to_action_id[key], int)
-
-        episodes = len(self.states)
-        for data_type in [self.states, self.next_states, self.rewards, self.actions, self.dones]:
-            assert len(data_type) == episodes
-
-        max_action_length = self.calculate_max_action_length(actions_to_action_id)
-
-        if self.action_balanced_replay_buffer:
-            print("Using action balanced replay buffer")
-            replay_buffer = Action_Balanced_Replay_Buffer(self.buffer_size, self.batch_size, self.seed, num_actions=self.num_actions)
-        else:
-            print("Using ordinary replay buffer")
-            replay_buffer = Replay_Buffer(self.buffer_size, self.batch_size, self.seed)
-
-        for episode_ix in range(episodes):
-            self.add_adapted_experience_for_an_episode(episode_ix, actions_to_action_id, max_action_length, replay_buffer)
-
-        return replay_buffer
-
-    def calculate_max_action_length(self, actions_to_action_id):
-        """Calculates the max length of the provided macro-actions"""
-        max_length = 0
-        for key in actions_to_action_id.keys():
-            action_length = len(key)
-            if action_length > max_length:
-                max_length = action_length
-        return max_length
-
-
-    def add_adapted_experience_for_an_episode(self, episode_ix, action_rules, max_action_length, replay_buffer):
-        """Adds all the experiences we have been given to a replay buffer after adapting experiences that involved doing a
-          macro action"""
-        states = self.states[episode_ix]
-        next_states = self.next_states[episode_ix]
-        rewards = self.rewards[episode_ix]
-        actions = self.actions[episode_ix]
-        dones = self.dones[episode_ix]
-
-        assert len(states) == len(next_states) == len(rewards) == len(dones) == len(actions), "{} {} {} {} {} = {}".format(len(states), len(next_states), len(rewards), len(dones), len(actions), actions)
-        steps = len(states)
-        for step in range(steps):
-            replay_buffer.add_experience(states[step], actions[step], rewards[step], next_states[step], dones[step])
-            for action_length in range(2, max_action_length + 1):
-                if step < action_length - 1: continue
-                action_sequence =  tuple(actions[step - action_length + 1 : step + 1])
-                assert all([action in range(self.num_actions) for action in action_sequence]), "All actions should be primitive here"
-                if action_sequence in action_rules.keys():
-                    new_action = action_rules[action_sequence]
-                    new_state = states[step - action_length + 1]
-                    new_reward = np.sum(rewards[step - action_length + 1:step + 1])
-                    new_reward = self.new_reward_fn(new_reward, len(action_sequence))
-                    new_next_state = next_states[step]
-                    new_dones = dones[step]
-                    replay_buffer.add_experience(new_state, new_action, new_reward, new_next_state, new_dones)
-
-
-    def add_episode_experience(self, states, next_states, rewards, actions, dones):
-        """Adds in an episode of experience"""
-        self.states.append(states)
-        self.next_states.append(next_states)
-        self.rewards.append(rewards)
-        self.actions.append(actions)
-        self.dones.append(dones)
-
-    def reset(self):
-        self.states = []
-        self.next_states = []
-        self.rewards = []
-        self.actions = []
-        self.dones = []
-
-
-
-
-
diff --git a/utilities/OU_Noise.py b/utilities/OU_Noise.py
deleted file mode 100644
index c5dde9fce3eef98fad8dcecd3d4b5baf24e4def7..0000000000000000000000000000000000000000
--- a/utilities/OU_Noise.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import numpy as np
-import random
-import copy
-
-class OU_Noise(object):
-    """Ornstein-Uhlenbeck process."""
-    def __init__(self, size, seed, mu=0., theta=0.15, sigma=0.2):
-        self.mu = mu * np.ones(size)
-        self.theta = theta
-        self.sigma = sigma
-        self.seed = random.seed(seed)
-        self.reset()
-
-    def reset(self):
-        """Reset the internal state (= noise) to mean (mu)."""
-        self.state = copy.copy(self.mu)
-
-    def sample(self):
-        """Update internal state and return it as a noise sample."""
-        dx = self.theta * (self.mu - self.state) + self.sigma * np.array([np.random.normal() for _ in range(len(self.state))])
-        self.state += dx
-        return self.state
\ No newline at end of file
diff --git a/utilities/Parallel_Experience_Generator.py b/utilities/Parallel_Experience_Generator.py
deleted file mode 100644
index e3ed74bc821197efb783eb80e207ce337963bf0a..0000000000000000000000000000000000000000
--- a/utilities/Parallel_Experience_Generator.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import random
-import torch
-import sys
-from contextlib import closing
-#
-# from pathos.multiprocessing import ProcessingPool as Pool
-
-from torch.multiprocessing import Pool
-from random import randint
-
-from utilities.OU_Noise import OU_Noise
-from utilities.Utility_Functions import create_actor_distribution
-
-class Parallel_Experience_Generator(object):
-    """ Plays n episode in parallel using a fixed agent. Only works for PPO or DDPG type agents at the moment, not Q-learning agents"""
-    def __init__(self, environment, policy, seed, hyperparameters, action_size, use_GPU=False, action_choice_output_columns=None):
-        self.use_GPU = use_GPU
-        self.environment =  environment
-        self.action_types = "DISCRETE" if self.environment.action_space.dtype in [int, 'int64'] else "CONTINUOUS"
-        self.action_size = action_size
-        self.policy = policy
-        self.action_choice_output_columns = action_choice_output_columns
-        self.hyperparameters = hyperparameters
-        if self.action_types == "CONTINUOUS": self.noise = OU_Noise(self.action_size, seed, self.hyperparameters["mu"],
-                            self.hyperparameters["theta"], self.hyperparameters["sigma"])
-
-
-    def play_n_episodes(self, n, exploration_epsilon=None):
-        """Plays n episodes in parallel using the fixed policy and returns the data"""
-        self.exploration_epsilon = exploration_epsilon
-        with closing(Pool(processes=n)) as pool:
-            results = pool.map(self, range(n))
-            pool.terminate()
-        states_for_all_episodes = [episode[0] for episode in results]
-        actions_for_all_episodes = [episode[1] for episode in results]
-        rewards_for_all_episodes = [episode[2] for episode in results]
-        return states_for_all_episodes, actions_for_all_episodes, rewards_for_all_episodes
-
-    def __call__(self, n):
-        exploration = max(0.0, random.uniform(self.exploration_epsilon / 3.0, self.exploration_epsilon * 3.0))
-        return self.play_1_episode(exploration)
-
-    def play_1_episode(self, epsilon_exploration):
-        """Plays 1 episode using the fixed policy and returns the data"""
-        state = self.reset_game()
-        done = False
-        episode_states = []
-        episode_actions = []
-        episode_rewards = []
-        while not done:
-            action = self.pick_action(self.policy, state, epsilon_exploration)
-            next_state, reward, done, _ = self.environment.step(action)
-            if self.hyperparameters["clip_rewards"]: reward = max(min(reward, 1.0), -1.0)
-            episode_states.append(state)
-            episode_actions.append(action)
-            episode_rewards.append(reward)
-            state = next_state
-        return episode_states, episode_actions, episode_rewards
-
-    def reset_game(self):
-        """Resets the game environment so it is ready to play a new episode"""
-        seed = randint(0, sys.maxsize)
-        torch.manual_seed(seed) # Need to do this otherwise each worker generates same experience
-        state = self.environment.reset()
-        if self.action_types == "CONTINUOUS": self.noise.reset()
-        return state
-
-    def pick_action(self, policy, state, epsilon_exploration=None):
-        """Picks an action using the policy"""
-        if self.action_types == "DISCRETE":
-            if random.random() <= epsilon_exploration:
-                action = random.randint(0, self.action_size - 1)
-                return action
-
-        state = torch.from_numpy(state).float().unsqueeze(0)
-        actor_output = policy.forward(state)
-        if self.action_choice_output_columns is not None:
-            actor_output = actor_output[:, self.action_choice_output_columns]
-        action_distribution = create_actor_distribution(self.action_types, actor_output, self.action_size)
-        action = action_distribution.sample().cpu()
-
-        if self.action_types == "CONTINUOUS": action += torch.Tensor(self.noise.sample())
-        else: action = action.item()
-        return action
\ No newline at end of file
diff --git a/utilities/PyTorch-logo-2.jpg b/utilities/PyTorch-logo-2.jpg
deleted file mode 100644
index 279075d3797046c3ab0259d8d8675cfcbd89a249..0000000000000000000000000000000000000000
Binary files a/utilities/PyTorch-logo-2.jpg and /dev/null differ
diff --git a/utilities/RL_image.jpeg b/utilities/RL_image.jpeg
deleted file mode 100644
index d93ac10f4fc2a4189cf8b29c568f8b7b916842d0..0000000000000000000000000000000000000000
Binary files a/utilities/RL_image.jpeg and /dev/null differ
diff --git a/utilities/Tensorboard.py b/utilities/Tensorboard.py
deleted file mode 100644
index 66002f34c759792112ce9a82d476c9b1391ce51a..0000000000000000000000000000000000000000
--- a/utilities/Tensorboard.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# NOTE that this code is not mine and was taken from https://becominghuman.ai/logging-in-tensorboard-with-pytorch-or-any-other-library-c549163dee9e
-
-
-import io
-import numpy as np
-from PIL import Image
-import tensorflow as tf
-
-# run tensorboard --logdir="logs/" on command line to get up the tensorboard afterwards
-
-class Tensorboard:
-    def __init__(self, logdir):
-        self.writer = tf.summary.FileWriter(logdir)
-
-    def close(self):
-        self.writer.close()
-
-    def log_scalar(self, tag, value, global_step):
-        summary = tf.Summary()
-        summary.value.add(tag=tag, simple_value=value)
-        self.writer.add_summary(summary, global_step=global_step)
-        self.writer.flush()
-
-    def log_histogram(self, tag, values, global_step, bins):
-        counts, bin_edges = np.histogram(values, bins=bins)
-
-        hist = tf.HistogramProto()
-        hist.min = float(np.min(values))
-        hist.max = float(np.max(values))
-        hist.num = int(np.prod(values.shape))
-        hist.sum = float(np.sum(values))
-        hist.sum_squares = float(np.sum(values ** 2))
-
-        bin_edges = bin_edges[1:]
-
-        for edge in bin_edges:
-            hist.bucket_limit.append(edge)
-        for c in counts:
-            hist.bucket.append(c)
-
-        summary = tf.Summary()
-        summary.value.add(tag=tag, histo=hist)
-        self.writer.add_summary(summary, global_step=global_step)
-        self.writer.flush()
-
-    def log_image(self, tag, img, global_step):
-        s = io.BytesIO()
-        Image.fromarray(img).save(s, format='png')
-
-        img_summary = tf.Summary.Image(encoded_image_string=s.getvalue(),
-                                       height=img.shape[0],
-                                       width=img.shape[1])
-
-        summary = tf.Summary()
-        summary.value.add(tag=tag, image=img_summary)
-        self.writer.add_summary(summary, global_step=global_step)
-        self.writer.flush()
-
-    def log_plot(self, tag, figure, global_step):
-        plot_buf = io.BytesIO()
-        figure.savefig(plot_buf, format='png')
-        plot_buf.seek(0)
-        img = Image.open(plot_buf)
-        img_ar = np.array(img)
-
-        img_summary = tf.Summary.Image(encoded_image_string=plot_buf.getvalue(),
-                                       height=img_ar.shape[0],
-                                       width=img_ar.shape[1])
-
-        summary = tf.Summary()
-        summary.value.add(tag=tag, image=img_summary)
-        self.writer.add_summary(summary, global_step=global_step)
-        self.writer.flush()
\ No newline at end of file
diff --git a/utilities/Utility_Functions.py b/utilities/Utility_Functions.py
deleted file mode 100644
index 0870df0fa868cce097c5c2560e6a44138aaf8f4a..0000000000000000000000000000000000000000
--- a/utilities/Utility_Functions.py
+++ /dev/null
@@ -1,122 +0,0 @@
-import math
-
-import numpy as np
-from abc import ABCMeta
-import torch
-from nn_builder.pytorch.NN import NN
-from torch.distributions import Categorical, normal, MultivariateNormal
-
-def abstract(cls):
-    return ABCMeta(cls.__name__, cls.__bases__, dict(cls.__dict__))
-    
-def save_score_results(file_path, results):
-    """Saves results as a numpy file at given path"""
-    np.save(file_path, results)
-
-def normalise_rewards(rewards):
-    """Normalises rewards to mean 0 and standard deviation 1"""
-    mean_reward = np.mean(rewards)
-    std_reward = np.std(rewards)
-    return (rewards - mean_reward) / (std_reward + 1e-8) #1e-8 added for stability
-
-def create_actor_distribution(action_types, actor_output, action_size):
-    """Creates a distribution that the actor can then use to randomly draw actions"""
-    if action_types == "DISCRETE":
-        assert actor_output.size()[1] == action_size, "Actor output the wrong size"
-        action_distribution = Categorical(actor_output)  # this creates a distribution to sample from
-    else:
-        assert actor_output.size()[1] == action_size * 2, "Actor output the wrong size"
-        means = actor_output[:, :action_size].squeeze(0)
-        stds = actor_output[:,  action_size:].squeeze(0)
-        if len(means.shape) == 2: means = means.squeeze(-1)
-        if len(stds.shape) == 2: stds = stds.squeeze(-1)
-        if len(stds.shape) > 1 or len(means.shape) > 1:
-            raise ValueError("Wrong mean and std shapes - {} -- {}".format(stds.shape, means.shape))
-        action_distribution = normal.Normal(means.squeeze(0), torch.abs(stds))
-    return action_distribution
-
-class SharedAdam(torch.optim.Adam):
-    """Creates an adam optimizer object that is shareable between processes. Useful for algorithms like A3C. Code
-    taken from https://github.com/ikostrikov/pytorch-a3c/blob/master/my_optim.py"""
-    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False):
-        super(SharedAdam, self).__init__(params, lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, amsgrad=amsgrad)
-        for group in self.param_groups:
-            for p in group['params']:
-                state = self.state[p]
-                state['step'] = torch.zeros(1)
-                state['exp_avg'] = p.data.new().resize_as_(p.data).zero_()
-                state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_()
-
-    def share_memory(self):
-        for group in self.param_groups:
-            for p in group['params']:
-                state = self.state[p]
-                state['step'].share_memory_()
-                state['exp_avg'].share_memory_()
-                state['exp_avg_sq'].share_memory_()
-
-    def step(self, closure=None):
-        """Performs a single optimization step.
-        Arguments:
-            closure (callable, optional): A closure that reevaluates the model
-                and returns the loss.
-        """
-        loss = None
-        if closure is not None:
-            loss = closure()
-        for group in self.param_groups:
-            for p in group['params']:
-                if p.grad is None:
-                    continue
-                grad = p.grad.data
-                amsgrad = group['amsgrad']
-                state = self.state[p]
-                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
-                if amsgrad:
-                    max_exp_avg_sq = state['max_exp_avg_sq']
-                beta1, beta2 = group['betas']
-                state['step'] += 1
-                if group['weight_decay'] != 0:
-                    grad = grad.add(group['weight_decay'], p.data)
-                # Decay the first and second moment running average coefficient
-                exp_avg.mul_(beta1).add_(1 - beta1, grad)
-                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
-                if amsgrad:
-                    # Maintains the maximum of all 2nd moment running avg. till now
-                    torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
-                    # Use the max. for normalizing running avg. of gradient
-                    denom = max_exp_avg_sq.sqrt().add_(group['eps'])
-                else:
-                    denom = exp_avg_sq.sqrt().add_(group['eps'])
-                bias_correction1 = 1 - beta1 ** state['step'].item()
-                bias_correction2 = 1 - beta2 ** state['step'].item()
-                step_size = group['lr'] * math.sqrt(
-                    bias_correction2) / bias_correction1
-
-                p.data.addcdiv_(-step_size, exp_avg, denom)
-        return loss
-
-def flatten_action_id_to_actions(action_id_to_actions, global_action_id_to_primitive_action, num_primitive_actions):
-    """Converts the values in an action_id_to_actions dictionary back to the primitive actions they represent"""
-    flattened_action_id_to_actions = {}
-    for key in action_id_to_actions.keys():
-        actions = action_id_to_actions[key]
-        raw_actions = backtrack_action_to_primitive_actions(actions, global_action_id_to_primitive_action, num_primitive_actions)
-        flattened_action_id_to_actions[key] = raw_actions
-    return flattened_action_id_to_actions
-
-def backtrack_action_to_primitive_actions(action_tuple, global_action_id_to_primitive_action, num_primitive_actions):
-    """Converts an action tuple back to the primitive actions it represents in a recursive way."""
-    print("Recursing to backtrack on ", action_tuple)
-    primitive_actions = range(num_primitive_actions)
-    if all(action in primitive_actions for action in action_tuple): return action_tuple #base case
-    new_action_tuple = []
-    for action in action_tuple:
-        if action in primitive_actions: new_action_tuple.append(action)
-        else:
-            converted_action = global_action_id_to_primitive_action[action]
-            print(new_action_tuple)
-            new_action_tuple.extend(converted_action)
-            print("Should have changed: ", new_action_tuple)
-    new_action_tuple = tuple(new_action_tuple)
-    return backtrack_action_to_primitive_actions(new_action_tuple)
diff --git a/utilities/__pycache__/OU_Noise.cpython-310.pyc b/utilities/__pycache__/OU_Noise.cpython-310.pyc
deleted file mode 100644
index 4c14e3652a198d70ba0c7395b4cddbcc45702799..0000000000000000000000000000000000000000
Binary files a/utilities/__pycache__/OU_Noise.cpython-310.pyc and /dev/null differ
diff --git a/utilities/__pycache__/OU_Noise.cpython-39.pyc b/utilities/__pycache__/OU_Noise.cpython-39.pyc
deleted file mode 100644
index 7189172f83e8120bd0b0424ba64a9d518f896d0f..0000000000000000000000000000000000000000
Binary files a/utilities/__pycache__/OU_Noise.cpython-39.pyc and /dev/null differ
diff --git a/utilities/__pycache__/Parallel_Experience_Generator.cpython-310.pyc b/utilities/__pycache__/Parallel_Experience_Generator.cpython-310.pyc
deleted file mode 100644
index fb66dcc0411dbb8d7bd2a14feb77a22b8767291c..0000000000000000000000000000000000000000
Binary files a/utilities/__pycache__/Parallel_Experience_Generator.cpython-310.pyc and /dev/null differ
diff --git a/utilities/__pycache__/Parallel_Experience_Generator.cpython-39.pyc b/utilities/__pycache__/Parallel_Experience_Generator.cpython-39.pyc
deleted file mode 100644
index 0469141fbb29254a889c1df1bc68c383ab3028ad..0000000000000000000000000000000000000000
Binary files a/utilities/__pycache__/Parallel_Experience_Generator.cpython-39.pyc and /dev/null differ
diff --git a/utilities/__pycache__/Utility_Functions.cpython-310.pyc b/utilities/__pycache__/Utility_Functions.cpython-310.pyc
deleted file mode 100644
index 5df83de365ff951896cc822f7e4ec32f0ae7fd52..0000000000000000000000000000000000000000
Binary files a/utilities/__pycache__/Utility_Functions.cpython-310.pyc and /dev/null differ
diff --git a/utilities/__pycache__/Utility_Functions.cpython-39.pyc b/utilities/__pycache__/Utility_Functions.cpython-39.pyc
deleted file mode 100644
index a2872ff2ae3d481583ec5541bd50df39faa08ee5..0000000000000000000000000000000000000000
Binary files a/utilities/__pycache__/Utility_Functions.cpython-39.pyc and /dev/null differ
diff --git a/utilities/data_structures/Action_Balanced_Replay_Buffer.py b/utilities/data_structures/Action_Balanced_Replay_Buffer.py
deleted file mode 100644
index f7f77fefbd8764a8d4b4d545241c85e4537d35fb..0000000000000000000000000000000000000000
--- a/utilities/data_structures/Action_Balanced_Replay_Buffer.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import random
-from collections import namedtuple, deque
-import torch
-import numpy as np
-from .Replay_Buffer import Replay_Buffer
-
-class Action_Balanced_Replay_Buffer(Replay_Buffer):
-    """Replay buffer that provides sample of experiences that have an equal number of each action being conducted"""
-    def __init__(self, buffer_size, batch_size, seed, num_actions):
-        self.num_actions = num_actions
-        self.buffer_size_per_memory = int(buffer_size / self.num_actions)
-
-        print("NUM ACTIONS ", self.num_actions)
-        self.memories = {action: deque(maxlen=self.buffer_size_per_memory) for action in range(self.num_actions)}
-        self.batch_size = batch_size
-        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
-        self.seed = random.seed(seed)
-        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-
-    def add_experience(self, states, actions, rewards, next_states, dones):
-        """Adds experience or list of experiences into the replay buffer"""
-        if type(dones) == list:
-            assert type(dones[0]) != list, "A done shouldn't be a list"
-            experiences = [self.experience(state, action, reward, next_state, done)
-                           for state, action, reward, next_state, done in
-                           zip(states, actions, rewards, next_states, dones)]
-            for experience in experiences:
-                action = experience.action
-                self.memories[action].append(experience)
-        else:
-            experience = self.experience(states, actions, rewards, next_states, dones)
-            self.memories[actions].append(experience)
-
-    def pick_experiences(self, num_experiences=None):
-        """Picks the experiences that the sample function will return as a random sample of experiences. It works by picking
-        an equal number of experiences that used each action (as far as possible)"""
-        if num_experiences: batch_size = num_experiences
-        else: batch_size = self.batch_size
-        batch_per_action = self.calculate_batch_sizes_per_action(batch_size)
-        samples_split_by_action = self.sample_each_action_equally(batch_per_action)
-        combined_sample = []
-        for key in samples_split_by_action.keys():
-            combined_sample.extend(samples_split_by_action[key])
-        return combined_sample
-
-    def calculate_batch_sizes_per_action(self, batch_size):
-        """Calculates the batch size we need to randomly draw from each action to make sure there is equal coverage
-        per action and that the batch gets filled up"""
-        min_batch_per_action = int(batch_size / self.num_actions)
-        batch_per_action = {k: min_batch_per_action for k in range(self.num_actions)}
-        current_batch_size = np.sum([batch_per_action[k] for k in range(self.num_actions)])
-        remainder = batch_size - current_batch_size
-        give_remainder_to = random.sample(range(self.num_actions), remainder)
-        for action in give_remainder_to:
-            batch_per_action[action] += 1
-        return batch_per_action
-
-    def sample_each_action_equally(self, batch_per_action):
-        """Samples a number of experiences (determined by batch_per_action) from the memory buffer for each action"""
-        samples = {}
-        for action in range(self.num_actions):
-            memory = self.memories[action]
-            batch_size_for_action = batch_per_action[action]
-            action_memory_size = len(memory)
-            assert action_memory_size > 0, "Need at least 1 experience for each action"
-            if action_memory_size >= batch_size_for_action:
-                samples[action] = random.sample(memory, batch_size_for_action)
-            else:
-                print("Memory size {} vs. required batch size {}".format(action_memory_size, batch_size_for_action))
-                samples_for_action = []
-                while len(samples_for_action) < batch_per_action[action]:
-                    remainder = batch_per_action[action] - len(samples_for_action)
-                    sampled_experiences = random.sample(memory, min(remainder, action_memory_size))
-                    samples_for_action.extend(sampled_experiences)
-                samples[action] = samples_for_action
-        return samples
-
-    def __len__(self):
-        return  np.sum([len(memory) for memory in self.memories.values()])
-
-    def sample_experiences_with_certain_actions(self, allowed_actions, num_all_actions, required_batch_size):
-        """Samples a number of experiences where the action conducted was in the list of required actions"""
-        assert isinstance(allowed_actions, list)
-        assert len(allowed_actions) > 0
-
-        num_new_actions = len(allowed_actions)
-        experiences_to_sample = int(required_batch_size * float(num_all_actions) / float(num_new_actions))
-        experiences = self.sample(num_experiences=experiences_to_sample)
-        states, actions, rewards, next_states, dones = experiences
-        matching_indexes = np.argwhere((np.in1d(actions.numpy(), allowed_actions)))
-        assert matching_indexes.shape[1] == 1
-
-        matching_indexes = matching_indexes[:, 0]
-
-        states = states[matching_indexes]
-        actions = actions[matching_indexes]
-        rewards = rewards[matching_indexes]
-        next_states = next_states[matching_indexes]
-        dones = dones[matching_indexes]
-
-        assert abs(states.shape[0] - required_batch_size) <= 0.05*required_batch_size, "{} vs. {}".format(states.shape[0], required_batch_size)
-
-
-        return (states, actions, rewards, next_states, dones)
diff --git a/utilities/data_structures/Config.py b/utilities/data_structures/Config.py
deleted file mode 100644
index 8ddc01a3040a1fc62869abf65578badd00dd66fe..0000000000000000000000000000000000000000
--- a/utilities/data_structures/Config.py
+++ /dev/null
@@ -1,22 +0,0 @@
-class Config(object):
-    """Object to hold the config requirements for an agent/game"""
-    def __init__(self):
-        self.seed = None
-        self.environment = None
-        self.requirements_to_solve_game = None
-        self.num_episodes_to_run = None
-        self.file_to_save_data_results = None
-        self.file_to_save_results_graph = None
-        self.runs_per_agent = None
-        self.visualise_overall_results = None
-        self.visualise_individual_results = None
-        self.hyperparameters = None
-        self.use_GPU = None
-        self.overwrite_existing_results_file = None
-        self.save_model = False
-        self.standard_deviation_results = 1.0
-        self.randomise_random_seed = True
-        self.show_solution_score = False
-        self.debug_mode = False
-
-
diff --git a/utilities/data_structures/Deque.py b/utilities/data_structures/Deque.py
deleted file mode 100644
index 610d4d121511965afd1782659fc1aac22b9bf8cd..0000000000000000000000000000000000000000
--- a/utilities/data_structures/Deque.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import numpy as np
-from utilities.data_structures.Node import Node
-
-class Deque(object):
-    """Generic deque object"""
-    def __init__(self, max_size, dimension_of_value_attribute):
-
-        self.max_size = max_size
-        self.dimension_of_value_attribute = dimension_of_value_attribute
-        self.deque = self.initialise_deque()
-        self.deque_index_to_overwrite_next = 0
-        self.reached_max_capacity = False
-        self.number_experiences_in_deque = 0
-
-    def initialise_deque(self):
-        """Initialises a queue of Nodes of length self.max_size"""
-        deque = np.array([Node(0, tuple([None for _ in range(self.dimension_of_value_attribute)])) for _ in range(self.max_size)])
-        return deque
-
-    def add_element_to_deque(self, new_key, new_value):
-        """Adds an element to the deque and then updates the index of the next element to be overwritten and also the
-        amount of elements in the deque"""
-        self.update_deque_node_key_and_value(self.deque_index_to_overwrite_next, new_key, new_value)
-        self.update_number_experiences_in_deque()
-        self.update_deque_index_to_overwrite_next()
-
-    def update_deque_node_key_and_value(self, index, new_key, new_value):
-        self.update_deque_node_key(index, new_key)
-        self.update_deque_node_value(index, new_value)
-
-    def update_deque_node_key(self, index, new_key):
-        self.deque[index].update_key(new_key)
-
-    def update_deque_node_value(self, index, new_value):
-        self.deque[index].update_value(new_value)
-
-    def update_deque_index_to_overwrite_next(self):
-        """Updates the deque index that we should write over next. When the buffer gets full we begin writing over
-         older experiences"""
-        if self.deque_index_to_overwrite_next < self.max_size - 1:
-            self.deque_index_to_overwrite_next += 1
-        else:
-            self.reached_max_capacity = True
-            self.deque_index_to_overwrite_next = 0
-
-    def update_number_experiences_in_deque(self):
-        """Keeps track of how many experiences there are in the buffer"""
-        if not self.reached_max_capacity:
-            self.number_experiences_in_deque += 1
\ No newline at end of file
diff --git a/utilities/data_structures/Max_Heap.py b/utilities/data_structures/Max_Heap.py
deleted file mode 100644
index 150bea2fd6129a09d57192eaa76b1e70ee46d862..0000000000000000000000000000000000000000
--- a/utilities/data_structures/Max_Heap.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import numpy as np
-from utilities.data_structures.Node import Node
-
-class Max_Heap(object):
-    """Generic max heap object"""
-    def __init__(self, max_size, dimension_of_value_attribute, default_key_to_use):
-
-        self.max_size = max_size
-        self.dimension_of_value_attribute = dimension_of_value_attribute
-        self.default_key_to_use = default_key_to_use
-        self.heap = self.initialise_heap()
-
-    def initialise_heap(self):
-        """Initialises a heap of Nodes of length self.max_size * 4 + 1"""
-        heap = np.array([Node(self.default_key_to_use, tuple([None for _ in range(self.dimension_of_value_attribute)])) for _ in range(self.max_size * 4 + 1)])
-
-        # We don't use the 0th element in a heap so we want it to have infinite value so it is never swapped with a lower node
-        heap[0] = Node(float("inf"), (None, None, None, None, None))
-        return heap
-
-    def update_element_and_reorganise_heap(self, heap_index_for_change, new_element):
-        self.update_heap_element(heap_index_for_change, new_element)
-        self.reorganise_heap(heap_index_for_change)
-
-    def update_heap_element(self, heap_index, new_element):
-        self.heap[heap_index] = new_element
-
-    def reorganise_heap(self, heap_index_changed):
-        """This reorganises the heap after a new value is added so as to keep the max value at the top of the heap which
-        is index position 1 in the array self.heap"""
-
-        node_key = self.heap[heap_index_changed].key
-        parent_index = int(heap_index_changed / 2)
-
-        if node_key > self.heap[parent_index].key:
-            self.swap_heap_elements(heap_index_changed, parent_index)
-            self.reorganise_heap(parent_index)
-
-        else:
-            biggest_child_index = self.calculate_index_of_biggest_child(heap_index_changed)
-            if node_key < self.heap[biggest_child_index].key:
-                self.swap_heap_elements(heap_index_changed, biggest_child_index)
-                self.reorganise_heap(biggest_child_index)
-
-    def swap_heap_elements(self, index1, index2):
-        """Swaps the position of two heap elements"""
-        self.heap[index1], self.heap[index2] = self.heap[index2], self.heap[index1]
-
-    def calculate_index_of_biggest_child(self, heap_index_changed):
-        """Calculates the heap index of the node's child with the biggest td_error value"""
-        left_child = self.heap[int(heap_index_changed * 2)]
-        right_child = self.heap[int(heap_index_changed * 2) + 1]
-
-        if left_child.key > right_child.key:
-            biggest_child_index = heap_index_changed * 2
-        else:
-            biggest_child_index = heap_index_changed * 2 + 1
-
-        return biggest_child_index
-
-    def give_max_key(self):
-        """Returns the maximum td error currently in the heap. Because it is a max heap this is the top element of the heap"""
-        return self.heap[1].key
diff --git a/utilities/data_structures/Node.py b/utilities/data_structures/Node.py
deleted file mode 100644
index bcb12c5bd94b8ec55dd7ad1ec039d4e30992a0da..0000000000000000000000000000000000000000
--- a/utilities/data_structures/Node.py
+++ /dev/null
@@ -1,18 +0,0 @@
-class Node(object):
-    """Generic Node class. Used in the implementation of a prioritised replay buffer"""
-    def __init__(self, key, value):
-        self.key = key
-        self.value = value
-
-    def update_key_and_value(self, new_key, new_value):
-        self.update_key(new_key)
-        self.update_value(new_value)
-
-    def update_key(self, new_key):
-        self.key = new_key
-
-    def update_value(self, new_value):
-        self.value = new_value
-
-    def __eq__(self, other):
-        return self.key == other.key and self.value == other.value
\ No newline at end of file
diff --git a/utilities/data_structures/Prioritised_Replay_Buffer.py b/utilities/data_structures/Prioritised_Replay_Buffer.py
deleted file mode 100644
index 46c2b4d4c19f53b23be2675c48fbdccfd400e86d..0000000000000000000000000000000000000000
--- a/utilities/data_structures/Prioritised_Replay_Buffer.py
+++ /dev/null
@@ -1,154 +0,0 @@
-import numpy as np
-import torch
-from utilities.data_structures.Deque import Deque
-from utilities.data_structures.Max_Heap import Max_Heap
-
-class Prioritised_Replay_Buffer(Max_Heap, Deque):
-    """Data structure that maintains a deque, a heap and an array. The deque keeps track of which experiences are the oldest and so
-     tells us which ones to delete once the buffer starts getting full. The heap lets us quickly retrieve the experience
-     with the max td_value. And the array lets us do quick random samples with probabilities equal to the proportional td errors.
-     We also keep track of the sum of the td values using a simple variable.
-
-    NOTE that this implementation is not optimal in terms of speed. At some point I will make improvements to it.
-
-     """
-
-    def __init__(self, hyperparameters, seed=0):
-        Max_Heap.__init__(self, hyperparameters["buffer_size"], dimension_of_value_attribute=5, default_key_to_use=0)
-        Deque.__init__(self, hyperparameters["buffer_size"], dimension_of_value_attribute=5)
-        np.random.seed(seed)
-
-        self.deques_td_errors = self.initialise_td_errors_array()
-
-        self.heap_index_to_overwrite_next = 1
-        self.number_experiences_in_deque = 0
-        self.adapted_overall_sum_of_td_errors = 0
-
-        self.alpha = hyperparameters["alpha_prioritised_replay"]
-        self.beta = hyperparameters["beta_prioritised_replay"]
-        self.incremental_td_error = hyperparameters["incremental_td_error"]
-        self.batch_size = hyperparameters["batch_size"]
-
-        self.heap_indexes_to_update_td_error_for = None
-
-        self.indexes_in_node_value_tuple = {
-            "state": 0,
-            "action": 1,
-            "reward": 2,
-            "next_state": 3,
-            "done": 4
-        }
-        # self.device = torch.device("cpu")
-        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-
-    def initialise_td_errors_array(self):
-        """Initialises a deque of Nodes of length self.max_size"""
-        return np.zeros(self.max_size)
-
-    def add_experience(self, raw_td_error, state, action, reward, next_state, done):
-        """Save an experience in the replay buffer"""
-        td_error = (abs(raw_td_error) + self.incremental_td_error) ** self.alpha
-        self.update_overall_sum(td_error, self.deque[self.deque_index_to_overwrite_next].key)
-        self.update_deque_and_deque_td_errors(td_error, state, action, reward, next_state, done)
-        self.update_heap_and_heap_index_to_overwrite()
-        self.update_number_experiences_in_deque()
-        self.update_deque_index_to_overwrite_next()
-
-    def update_overall_sum(self, new_td_error, old_td_error):
-        """Updates the overall sum of td_values present in the buffer"""
-        self.adapted_overall_sum_of_td_errors += new_td_error  - old_td_error
-
-    def update_deque_and_deque_td_errors(self, td_error, state, action, reward, next_state, done):
-        """Updates the deque by overwriting the oldest experience with the experience provided"""
-        self.deques_td_errors[self.deque_index_to_overwrite_next] = td_error
-        self.add_element_to_deque(td_error, (state, action, reward, next_state, done))
-
-    def add_element_to_deque(self, new_key, new_value):
-        """Adds an element to the deque"""
-        self.update_deque_node_key_and_value(self.deque_index_to_overwrite_next, new_key, new_value)
-
-    def update_heap_and_heap_index_to_overwrite(self):
-        """Updates the heap by rearranging it given the new experience that was just incorporated into it. If we haven't
-        reached max capacity then the new experience is added directly into the heap, otherwise a pointer on the heap has
-        changed to reflect the new experience so there's no need to add it in"""
-        if not self.reached_max_capacity:
-            self.update_heap_element(self.heap_index_to_overwrite_next, self.deque[self.deque_index_to_overwrite_next])
-            self.deque[self.deque_index_to_overwrite_next].heap_index = self.heap_index_to_overwrite_next
-            self.update_heap_index_to_overwrite_next()
-
-        heap_index_change = self.deque[self.deque_index_to_overwrite_next].heap_index
-        self.reorganise_heap(heap_index_change)
-
-    def update_heap_index_to_overwrite_next(self):
-        """This updates the heap index to write over next. Once the buffer gets full we stop calling this function because
-        the nodes the heap points to start being changed directly rather than the pointers on the heap changing"""
-        self.heap_index_to_overwrite_next += 1
-
-    def swap_heap_elements(self, index1, index2):
-        """Swaps two position of two heap elements and then updates the heap_index stored in the two nodes. We have to override
-        this method from Max_Heap so that it also updates the heap_index variables"""
-        self.heap[index1], self.heap[index2] = self.heap[index2], self.heap[index1]
-        self.heap[index1].heap_index = index1
-        self.heap[index2].heap_index = index2
-
-    def sample(self, rank_based=True):
-        """Randomly samples a batch from experiences giving a higher likelihood to experiences with a higher td error. It then
-        calculates an importance sampling weight for each sampled experience, you can read about this in the paper:
-        https://arxiv.org/pdf/1511.05952.pdf"""
-        experiences, deque_sample_indexes = self.pick_experiences_based_on_proportional_td_error()
-        states, actions, rewards, next_states, dones = self.separate_out_data_types(experiences)
-        self.deque_sample_indexes_to_update_td_error_for = deque_sample_indexes
-        importance_sampling_weights = self.calculate_importance_sampling_weights(experiences)
-        return (states, actions, rewards, next_states, dones), importance_sampling_weights
-
-    def pick_experiences_based_on_proportional_td_error(self):
-        """Randomly picks a batch of experiences with probability equal to their proportional td_errors"""
-        probabilities = self.deques_td_errors / self.give_adapted_sum_of_td_errors()
-        deque_sample_indexes = np.random.choice(range(len(self.deques_td_errors)), size=self.batch_size, replace=False, p=probabilities)
-        experiences = self.deque[deque_sample_indexes]
-        return experiences, deque_sample_indexes
-
-    def separate_out_data_types(self, experiences):
-        """Separates out experiences into their different parts and makes them tensors ready to be used in a pytorch model"""
-        states = torch.from_numpy(np.vstack([e.value[self.indexes_in_node_value_tuple["state"]] for e in experiences])).float().to(self.device)
-        actions = torch.from_numpy(np.vstack([e.value[self.indexes_in_node_value_tuple["action"]] for e in experiences])).float().to(self.device)
-        rewards = torch.from_numpy(np.vstack([e.value[self.indexes_in_node_value_tuple["reward"]] for e in experiences])).float().to(self.device)
-        next_states = torch.from_numpy(np.vstack([e.value[self.indexes_in_node_value_tuple["next_state"]] for e in experiences])).float().to(
-            self.device)
-        dones = torch.from_numpy(np.vstack([int(e.value[self.indexes_in_node_value_tuple["done"]]) for e in experiences])).float().to(self.device)
-
-        return states, actions, rewards, next_states, dones
-
-    def calculate_importance_sampling_weights(self, experiences):
-        """Calculates the importance sampling weight of each observation in the sample. The weight is proportional to the td_error of the observation,
-        see the paper here for more details: https://arxiv.org/pdf/1511.05952.pdf"""
-        td_errors = [experience.key for experience in experiences]
-        importance_sampling_weights = [((1.0 / self.number_experiences_in_deque) * (self.give_adapted_sum_of_td_errors() / td_error)) ** self.beta for td_error in td_errors]
-        sample_max_importance_weight = max(importance_sampling_weights)
-        importance_sampling_weights = [is_weight / sample_max_importance_weight for is_weight in importance_sampling_weights]
-        importance_sampling_weights = torch.tensor(importance_sampling_weights).float().to(self.device)
-        return importance_sampling_weights
-
-    def update_td_errors(self, td_errors):
-        """Updates the td_errors for the provided heap indexes. The indexes should be the observations provided most
-        recently by the give_sample method"""
-        for raw_td_error, deque_index in zip(td_errors, self.deque_sample_indexes_to_update_td_error_for):
-            td_error =  (abs(raw_td_error) + self.incremental_td_error) ** self.alpha
-            corresponding_heap_index = self.deque[deque_index].heap_index
-            self.update_overall_sum(td_error, self.heap[corresponding_heap_index].key)
-            self.heap[corresponding_heap_index].key = td_error
-            self.reorganise_heap(corresponding_heap_index)
-            self.deques_td_errors[deque_index] = td_error
-
-    def give_max_td_error(self):
-        """Returns the maximum td error currently in the heap. Because it is a max heap this is the top element of the heap"""
-        return self.give_max_key()
-
-    def give_adapted_sum_of_td_errors(self):
-        """Returns the sum of td errors of the experiences currently in the heap"""
-        return self.adapted_overall_sum_of_td_errors
-
-    def __len__(self):
-        """Tells us how many experiences there are in the replay buffer. This number will never exceed self.max_size"""
-        return self.number_experiences_in_deque
-
diff --git a/utilities/data_structures/Replay_Buffer.py b/utilities/data_structures/Replay_Buffer.py
deleted file mode 100644
index a1734a1c269c1abff8b6e518f3d04335a67ae1da..0000000000000000000000000000000000000000
--- a/utilities/data_structures/Replay_Buffer.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from collections import namedtuple, deque
-import random
-import torch
-import numpy as np
-
-class Replay_Buffer(object):
-    """Replay buffer to store past experiences that the agent can then use for training data"""
-    
-    def __init__(self, buffer_size, batch_size, seed, device=None):
-
-        self.memory = deque(maxlen=buffer_size)
-        self.batch_size = batch_size
-        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
-        self.seed = random.seed(seed)
-        if device:
-            self.device = torch.device(device)
-        else:
-            self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-
-    def add_experience(self, states, actions, rewards, next_states, dones):
-        """Adds experience(s) into the replay buffer"""
-        if type(dones) == list:
-            assert type(dones[0]) != list, "A done shouldn't be a list"
-            experiences = [self.experience(state, action, reward, next_state, done)
-                           for state, action, reward, next_state, done in
-                           zip(states, actions, rewards, next_states, dones)]
-            self.memory.extend(experiences)
-        else:
-            experience = self.experience(states, actions, rewards, next_states, dones)
-            self.memory.append(experience)
-   
-    def sample(self, num_experiences=None, separate_out_data_types=True):
-        """Draws a random sample of experience from the replay buffer"""
-        experiences = self.pick_experiences(num_experiences)
-        if separate_out_data_types:
-            states, actions, rewards, next_states, dones = self.separate_out_data_types(experiences)
-            return states, actions, rewards, next_states, dones
-        else:
-            return experiences
-            
-    def separate_out_data_types(self, experiences):
-        """Puts the sampled experience into the correct format for a PyTorch neural network"""
-        states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(self.device)
-        actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).float().to(self.device)
-        rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(self.device)
-        next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(self.device)
-        dones = torch.from_numpy(np.vstack([int(e.done) for e in experiences if e is not None])).float().to(self.device)
-        
-        return states, actions, rewards, next_states, dones
-    
-    def pick_experiences(self, num_experiences=None):
-        if num_experiences is not None: batch_size = num_experiences
-        else: batch_size = self.batch_size
-        return random.sample(self.memory, k=batch_size)
-
-    def __len__(self):
-        return len(self.memory)
diff --git a/utilities/data_structures/Tanh_Distribution.py b/utilities/data_structures/Tanh_Distribution.py
deleted file mode 100644
index ca46055753b7b5a1f312e4a38b3c4dc39b1e54ff..0000000000000000000000000000000000000000
--- a/utilities/data_structures/Tanh_Distribution.py
+++ /dev/null
@@ -1,78 +0,0 @@
-
-# NOTE that this is not my code.
-# Taken from here: https://github.com/vitchyr/rlkit/blob/master/rlkit/torch/distributions.py
-
-
-import torch
-from torch.distributions import Distribution, Normal
-
-
-class TanhNormal(Distribution):
-    """
-    Represent distribution of X where
-        X ~ tanh(Z)
-        Z ~ N(mean, std)
-    Note: this is not very numerically stable.
-    """
-    def __init__(self, normal_mean, normal_std, epsilon=1e-6):
-        """
-        :param normal_mean: Mean of the normal distribution
-        :param normal_std: Std of the normal distribution
-        :param epsilon: Numerical stability epsilon when computing log-prob.
-        """
-        self.normal_mean = normal_mean
-        self.normal_std = normal_std
-        self.normal = Normal(normal_mean, normal_std)
-        self.epsilon = epsilon
-
-    def sample_n(self, n, return_pre_tanh_value=False):
-        z = self.normal.sample_n(n)
-        if return_pre_tanh_value:
-            return torch.tanh(z), z
-        else:
-            return torch.tanh(z)
-
-    def log_prob(self, value, pre_tanh_value=None):
-        """
-        :param value: some value, x
-        :param pre_tanh_value: arctanh(x)
-        :return:
-        """
-        if pre_tanh_value is None:
-            pre_tanh_value = torch.log(
-                (1+value) / (1-value)
-            ) / 2
-        return self.normal.log_prob(pre_tanh_value) - torch.log(
-            1 - value * value + self.epsilon
-        )
-
-    def sample(self, return_pretanh_value=False):
-        """
-        Gradients will and should *not* pass through this operation.
-        See https://github.com/pytorch/pytorch/issues/4620 for discussion.
-        """
-        z = self.normal.sample().detach()
-
-        if return_pretanh_value:
-            return torch.tanh(z), z
-        else:
-            return torch.tanh(z)
-
-    def rsample(self, return_pretanh_value=False):
-        """
-        Sampling in the reparameterization case.
-        """
-        z = (
-            self.normal_mean +
-            self.normal_std *
-            Normal(
-                torch.zeros(self.normal_mean.size()),
-                torch.ones(self.normal_std.size())
-            ).sample()
-        )
-        z.requires_grad_()
-
-        if return_pretanh_value:
-            return torch.tanh(z), z
-        else:
-            return torch.tanh(z)
\ No newline at end of file
diff --git a/utilities/data_structures/__pycache__/Config.cpython-310.pyc b/utilities/data_structures/__pycache__/Config.cpython-310.pyc
deleted file mode 100644
index bc53e7ffc659728b7a521c56db200ebc3ae14f70..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Config.cpython-310.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Config.cpython-38.pyc b/utilities/data_structures/__pycache__/Config.cpython-38.pyc
deleted file mode 100644
index ca7e004ba86a424cbb7c8ae5d5447c021844b22c..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Config.cpython-38.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Config.cpython-39.pyc b/utilities/data_structures/__pycache__/Config.cpython-39.pyc
deleted file mode 100644
index fb896ed3ad949baede4b1fdb555600a2b18b1a4e..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Config.cpython-39.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Deque.cpython-310.pyc b/utilities/data_structures/__pycache__/Deque.cpython-310.pyc
deleted file mode 100644
index 49899d8dca31399c49964f80e86383e154e6cf84..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Deque.cpython-310.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Deque.cpython-39.pyc b/utilities/data_structures/__pycache__/Deque.cpython-39.pyc
deleted file mode 100644
index 146749dd0dd9ff979bcbab8432eea01c63ce5864..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Deque.cpython-39.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Max_Heap.cpython-310.pyc b/utilities/data_structures/__pycache__/Max_Heap.cpython-310.pyc
deleted file mode 100644
index 09e6462f32a511cf6b802a714b774320d261252d..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Max_Heap.cpython-310.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Max_Heap.cpython-39.pyc b/utilities/data_structures/__pycache__/Max_Heap.cpython-39.pyc
deleted file mode 100644
index 210cb1c39ddad9908bd4050f97f764e1588fe6d0..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Max_Heap.cpython-39.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Node.cpython-310.pyc b/utilities/data_structures/__pycache__/Node.cpython-310.pyc
deleted file mode 100644
index c9f0893951f86c839ee3dd6c3d03c2d36bddb53d..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Node.cpython-310.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Node.cpython-39.pyc b/utilities/data_structures/__pycache__/Node.cpython-39.pyc
deleted file mode 100644
index f43ea58c310ca5fef46e4e39e263442e8424a9f2..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Node.cpython-39.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Prioritised_Replay_Buffer.cpython-310.pyc b/utilities/data_structures/__pycache__/Prioritised_Replay_Buffer.cpython-310.pyc
deleted file mode 100644
index d41984866c088ca4759e0077d7c7f852a3a5b7b3..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Prioritised_Replay_Buffer.cpython-310.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Prioritised_Replay_Buffer.cpython-39.pyc b/utilities/data_structures/__pycache__/Prioritised_Replay_Buffer.cpython-39.pyc
deleted file mode 100644
index 6f6bbd854368505a7a7b598a92facae9e0a63f0e..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Prioritised_Replay_Buffer.cpython-39.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Replay_Buffer.cpython-310.pyc b/utilities/data_structures/__pycache__/Replay_Buffer.cpython-310.pyc
deleted file mode 100644
index 0337d1648b176b0e5ff859fc31dce3c32b528a6d..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Replay_Buffer.cpython-310.pyc and /dev/null differ
diff --git a/utilities/data_structures/__pycache__/Replay_Buffer.cpython-39.pyc b/utilities/data_structures/__pycache__/Replay_Buffer.cpython-39.pyc
deleted file mode 100644
index 7de1a334d661427275ab60a095f6e21b496332ab..0000000000000000000000000000000000000000
Binary files a/utilities/data_structures/__pycache__/Replay_Buffer.cpython-39.pyc and /dev/null differ
diff --git a/utilities/grammar_algorithms/k_Sequitur.py b/utilities/grammar_algorithms/k_Sequitur.py
deleted file mode 100644
index 0de4ae671d77b4283ec986f7d20f28540755fa3a..0000000000000000000000000000000000000000
--- a/utilities/grammar_algorithms/k_Sequitur.py
+++ /dev/null
@@ -1,166 +0,0 @@
-
-# My implementation of the k_Sequitur algorithm described in the papers: https://arxiv.org/pdf/cs/9709102.pdf
-# and https://www.biorxiv.org/content/biorxiv/early/2018/03/13/281543.full.pdf
-# The algorithm takes in a sequence and forms a grammar using two rules:
-# 1) No pair of adjacent symbols appears more than k times in the grammar
-# 2) Every rule in the grammar is used more than k times
-#
-# e.g. string "abddddeabde" with k=2 would turn to:
-# "AdddBAB"
-# R1: A --> ab
-# R2: B --> de
-
-# TODO fix the fact that it sometimes provides rules that have end of episode symbol in them
-# TODO add an option to return rules in terms of the amount of times they appear in a set of provided episodes
-
-from collections import defaultdict, Counter
-
-
-class k_Sequitur(object):
-
-    def __init__(self, k, end_of_episode_symbol="/"):
-        self.k = k
-        self.end_of_episode_symbol = end_of_episode_symbol
-        self.next_rule_name_ix = 0
-
-    def generate_action_grammar(self, actions):
-        """Generates a grammar given a list of actions"""
-        assert isinstance(actions, list), actions
-        assert not isinstance(actions[0], list), "Should be 1 long list of actions - {}".format(actions[0])
-        assert len(actions) > 0, "Need to provide a list of at least 1 action"
-        assert isinstance(actions[0], int), "The actions should be integers"
-        new_actions, all_rules, rule_usage, rules_episode_appearance_count = self.discover_all_rules_and_new_actions_representation(actions)
-        action_usage = self.extract_action_usage_from_rule_usage(rule_usage, all_rules)
-        rules_episode_appearance_count = self.extract_action_usage_from_rule_usage(rules_episode_appearance_count,
-                                                                                   all_rules)
-        return new_actions, all_rules, action_usage, rules_episode_appearance_count
-
-    def discover_all_rules_and_new_actions_representation(self, actions):
-        """Takes in a list of actions and discovers all the rules present that get used more than self.k times and the
-        subsequent new actions list when all rules are applied recursively"""
-        all_rules = {}
-        current_actions = None
-        new_actions = actions
-        rule_usage = defaultdict(int)
-        num_episodes = Counter(actions)[self.end_of_episode_symbol]
-        rules_episode_appearance_tracker = {k: defaultdict(int) for k in range(num_episodes)}
-
-        while new_actions != current_actions:
-            current_actions = new_actions
-            rules, reverse_rules = self.generate_1_layer_of_rules(current_actions)
-            all_rules.update(rules)
-            new_actions, rules_usage_count = self.convert_a_string_using_reverse_rules(current_actions, reverse_rules,
-                                                                                       rules_episode_appearance_tracker)
-            for key in rules_usage_count.keys():
-                rule_usage[key] += rules_usage_count[key]
-
-        rules_episode_appearance_count = defaultdict(int)
-
-        for episode in range(num_episodes):
-            rule_apperance_tracker = rules_episode_appearance_tracker[episode]
-            for key in rule_apperance_tracker.keys():
-                if rule_apperance_tracker[key] == 1:
-                    rules_episode_appearance_count[key] += 1
-
-        return new_actions, all_rules, rule_usage, rules_episode_appearance_count
-
-    def generate_1_layer_of_rules(self, string):
-        """Generate dictionaries indicating the pair of symbols that appear next to each other more than self.k times"""
-        pairs_of_symbols = defaultdict(int)
-        last_pair = None
-        skip_next_symbol = False
-        rules = {}
-
-        assert string[-1] == self.end_of_episode_symbol, "Final element of string must be self.end_of_episode_symbol {}".format(string)
-
-        for ix in range(len(string) - 1):
-            # We skip the next symbol if it is already being used in a rule we just made
-            if skip_next_symbol:
-                skip_next_symbol = False
-                continue
-
-            pair = (string[ix], string[ix+1])
-
-            # We don't count a pair if it was the previous pair (and therefore we have 3 of the same symbols in a row)
-            if pair != last_pair:
-                pairs_of_symbols[pair] += 1
-                last_pair = pair
-            else: last_pair = None
-            if pairs_of_symbols[pair] >= self.k:
-                previous_pair = (string[ix-1], string[ix])
-                pairs_of_symbols[previous_pair] -= 1
-                skip_next_symbol = True
-                if pair not in rules.values() and self.end_of_episode_symbol not in pair:
-                    rule_name = self.get_next_rule_name()
-                    rules[rule_name] = pair
-        reverse_rules = {v: k for k, v in rules.items()}
-        return rules, reverse_rules
-
-    def get_next_rule_name(self):
-        """Returns next rule name to use and increments count """
-        next_rule_name = "R{}".format(self.next_rule_name_ix)
-        self.next_rule_name_ix += 1
-        return next_rule_name
-
-    def convert_symbol_to_raw_actions(self, symbol, rules):
-        """Converts a symbol back to the sequence of raw actions it represents"""
-        assert not isinstance(symbol, list)
-        assert isinstance(symbol, str) or isinstance(symbol, int)
-        symbol = [symbol]
-        finished = False
-        while not finished:
-            new_symbol = []
-            for symbol_val in symbol:
-                if symbol_val in rules.keys():
-                    new_symbol.append(rules[symbol_val][0])
-                    new_symbol.append(rules[symbol_val][1])
-                else:
-                    new_symbol.append(symbol_val)
-            if new_symbol == symbol: finished = True
-            else: symbol = new_symbol
-        new_symbol = tuple(new_symbol)
-        return new_symbol
-
-    def extract_action_usage_from_rule_usage(self, rule_usage, all_rules):
-        """Extracts the usage of each action (of 2 or more primitive actions) out from the usage of each rule"""
-        action_usage = {}
-        for key in rule_usage.keys():
-            action_usage[self.convert_symbol_to_raw_actions(key, all_rules)] = rule_usage[key]
-        return action_usage
-
-    def convert_a_string_using_reverse_rules(self, string, reverse_rules, rules_episode_appearance_tracker):
-        """Converts a string using the rules we have previously generated"""
-        new_string = []
-        skip_next_element = False
-        rules_usage_count = defaultdict(int)
-
-        episode = 0
-
-        rules_used_this_episode = []
-
-        for ix in range(len(string)):
-            if string[ix] == self.end_of_episode_symbol:
-                rules_used_this_episode = set(rules_used_this_episode)
-                for rule in rules_used_this_episode:
-                    rules_episode_appearance_tracker[episode][rule] = 1
-                rules_used_this_episode = []
-                episode += 1
-
-            if skip_next_element:
-                skip_next_element = False
-                continue
-            # If is last element in string and wasn't just part of a pair then we add it to new string and finish
-            if ix == len(string) - 1:
-                new_string.append(string[ix])
-                continue
-            pair = (string[ix], string[ix+1])
-            if pair in reverse_rules.keys():
-                result = reverse_rules[pair]
-                rules_usage_count[result] += 1
-                rules_used_this_episode.append(result)
-                new_string.append(result)
-                skip_next_element = True
-            else:
-                new_string.append(string[ix])
-        return new_string, rules_usage_count
-