import math import numpy as np from abc import ABCMeta import torch from nn_builder.pytorch.NN import NN from torch.distributions import Categorical, normal, MultivariateNormal def abstract(cls): return ABCMeta(cls.__name__, cls.__bases__, dict(cls.__dict__)) def save_score_results(file_path, results): """Saves results as a numpy file at given path""" np.save(file_path, results) def normalise_rewards(rewards): """Normalises rewards to mean 0 and standard deviation 1""" mean_reward = np.mean(rewards) std_reward = np.std(rewards) return (rewards - mean_reward) / (std_reward + 1e-8) #1e-8 added for stability def create_actor_distribution(action_types, actor_output, action_size): """Creates a distribution that the actor can then use to randomly draw actions""" if action_types == "DISCRETE": assert actor_output.size()[1] == action_size, "Actor output the wrong size" action_distribution = Categorical(actor_output) # this creates a distribution to sample from else: assert actor_output.size()[1] == action_size * 2, "Actor output the wrong size" means = actor_output[:, :action_size].squeeze(0) stds = actor_output[:, action_size:].squeeze(0) if len(means.shape) == 2: means = means.squeeze(-1) if len(stds.shape) == 2: stds = stds.squeeze(-1) if len(stds.shape) > 1 or len(means.shape) > 1: raise ValueError("Wrong mean and std shapes - {} -- {}".format(stds.shape, means.shape)) action_distribution = normal.Normal(means.squeeze(0), torch.abs(stds)) return action_distribution class SharedAdam(torch.optim.Adam): """Creates an adam optimizer object that is shareable between processes. Useful for algorithms like A3C. Code taken from https://github.com/ikostrikov/pytorch-a3c/blob/master/my_optim.py""" def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False): super(SharedAdam, self).__init__(params, lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, amsgrad=amsgrad) for group in self.param_groups: for p in group['params']: state = self.state[p] state['step'] = torch.zeros(1) state['exp_avg'] = p.data.new().resize_as_(p.data).zero_() state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_() def share_memory(self): for group in self.param_groups: for p in group['params']: state = self.state[p] state['step'].share_memory_() state['exp_avg'].share_memory_() state['exp_avg_sq'].share_memory_() def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data amsgrad = group['amsgrad'] state = self.state[p] exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] if amsgrad: max_exp_avg_sq = state['max_exp_avg_sq'] beta1, beta2 = group['betas'] state['step'] += 1 if group['weight_decay'] != 0: grad = grad.add(group['weight_decay'], p.data) # Decay the first and second moment running average coefficient exp_avg.mul_(beta1).add_(1 - beta1, grad) exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) if amsgrad: # Maintains the maximum of all 2nd moment running avg. till now torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) # Use the max. for normalizing running avg. of gradient denom = max_exp_avg_sq.sqrt().add_(group['eps']) else: denom = exp_avg_sq.sqrt().add_(group['eps']) bias_correction1 = 1 - beta1 ** state['step'].item() bias_correction2 = 1 - beta2 ** state['step'].item() step_size = group['lr'] * math.sqrt( bias_correction2) / bias_correction1 p.data.addcdiv_(-step_size, exp_avg, denom) return loss def flatten_action_id_to_actions(action_id_to_actions, global_action_id_to_primitive_action, num_primitive_actions): """Converts the values in an action_id_to_actions dictionary back to the primitive actions they represent""" flattened_action_id_to_actions = {} for key in action_id_to_actions.keys(): actions = action_id_to_actions[key] raw_actions = backtrack_action_to_primitive_actions(actions, global_action_id_to_primitive_action, num_primitive_actions) flattened_action_id_to_actions[key] = raw_actions return flattened_action_id_to_actions def backtrack_action_to_primitive_actions(action_tuple, global_action_id_to_primitive_action, num_primitive_actions): """Converts an action tuple back to the primitive actions it represents in a recursive way.""" print("Recursing to backtrack on ", action_tuple) primitive_actions = range(num_primitive_actions) if all(action in primitive_actions for action in action_tuple): return action_tuple #base case new_action_tuple = [] for action in action_tuple: if action in primitive_actions: new_action_tuple.append(action) else: converted_action = global_action_id_to_primitive_action[action] print(new_action_tuple) new_action_tuple.extend(converted_action) print("Should have changed: ", new_action_tuple) new_action_tuple = tuple(new_action_tuple) return backtrack_action_to_primitive_actions(new_action_tuple)