jam_shield_LLM_app / utilities /Utility_Functions.py
asataura's picture
initial commit
6fa23b0
raw
history blame
6.08 kB
import math
import numpy as np
from abc import ABCMeta
import torch
from nn_builder.pytorch.NN import NN
from torch.distributions import Categorical, normal, MultivariateNormal
def abstract(cls):
return ABCMeta(cls.__name__, cls.__bases__, dict(cls.__dict__))
def save_score_results(file_path, results):
"""Saves results as a numpy file at given path"""
np.save(file_path, results)
def normalise_rewards(rewards):
"""Normalises rewards to mean 0 and standard deviation 1"""
mean_reward = np.mean(rewards)
std_reward = np.std(rewards)
return (rewards - mean_reward) / (std_reward + 1e-8) #1e-8 added for stability
def create_actor_distribution(action_types, actor_output, action_size):
"""Creates a distribution that the actor can then use to randomly draw actions"""
if action_types == "DISCRETE":
assert actor_output.size()[1] == action_size, "Actor output the wrong size"
action_distribution = Categorical(actor_output) # this creates a distribution to sample from
else:
assert actor_output.size()[1] == action_size * 2, "Actor output the wrong size"
means = actor_output[:, :action_size].squeeze(0)
stds = actor_output[:, action_size:].squeeze(0)
if len(means.shape) == 2: means = means.squeeze(-1)
if len(stds.shape) == 2: stds = stds.squeeze(-1)
if len(stds.shape) > 1 or len(means.shape) > 1:
raise ValueError("Wrong mean and std shapes - {} -- {}".format(stds.shape, means.shape))
action_distribution = normal.Normal(means.squeeze(0), torch.abs(stds))
return action_distribution
class SharedAdam(torch.optim.Adam):
"""Creates an adam optimizer object that is shareable between processes. Useful for algorithms like A3C. Code
taken from https://github.com/ikostrikov/pytorch-a3c/blob/master/my_optim.py"""
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False):
super(SharedAdam, self).__init__(params, lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, amsgrad=amsgrad)
for group in self.param_groups:
for p in group['params']:
state = self.state[p]
state['step'] = torch.zeros(1)
state['exp_avg'] = p.data.new().resize_as_(p.data).zero_()
state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_()
def share_memory(self):
for group in self.param_groups:
for p in group['params']:
state = self.state[p]
state['step'].share_memory_()
state['exp_avg'].share_memory_()
state['exp_avg_sq'].share_memory_()
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
loss = closure()
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
amsgrad = group['amsgrad']
state = self.state[p]
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
if amsgrad:
max_exp_avg_sq = state['max_exp_avg_sq']
beta1, beta2 = group['betas']
state['step'] += 1
if group['weight_decay'] != 0:
grad = grad.add(group['weight_decay'], p.data)
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
if amsgrad:
# Maintains the maximum of all 2nd moment running avg. till now
torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
# Use the max. for normalizing running avg. of gradient
denom = max_exp_avg_sq.sqrt().add_(group['eps'])
else:
denom = exp_avg_sq.sqrt().add_(group['eps'])
bias_correction1 = 1 - beta1 ** state['step'].item()
bias_correction2 = 1 - beta2 ** state['step'].item()
step_size = group['lr'] * math.sqrt(
bias_correction2) / bias_correction1
p.data.addcdiv_(-step_size, exp_avg, denom)
return loss
def flatten_action_id_to_actions(action_id_to_actions, global_action_id_to_primitive_action, num_primitive_actions):
"""Converts the values in an action_id_to_actions dictionary back to the primitive actions they represent"""
flattened_action_id_to_actions = {}
for key in action_id_to_actions.keys():
actions = action_id_to_actions[key]
raw_actions = backtrack_action_to_primitive_actions(actions, global_action_id_to_primitive_action, num_primitive_actions)
flattened_action_id_to_actions[key] = raw_actions
return flattened_action_id_to_actions
def backtrack_action_to_primitive_actions(action_tuple, global_action_id_to_primitive_action, num_primitive_actions):
"""Converts an action tuple back to the primitive actions it represents in a recursive way."""
print("Recursing to backtrack on ", action_tuple)
primitive_actions = range(num_primitive_actions)
if all(action in primitive_actions for action in action_tuple): return action_tuple #base case
new_action_tuple = []
for action in action_tuple:
if action in primitive_actions: new_action_tuple.append(action)
else:
converted_action = global_action_id_to_primitive_action[action]
print(new_action_tuple)
new_action_tuple.extend(converted_action)
print("Should have changed: ", new_action_tuple)
new_action_tuple = tuple(new_action_tuple)
return backtrack_action_to_primitive_actions(new_action_tuple)