import math

import torch
from torch import nn
import torch.nn.functional as F

__all__ = ["get_act_layer", "get_activation"]

########################################### Layer ###########################################
_ACT_LAYER_DEFAULT = dict(
    relu=nn.ReLU,
    elu=nn.ELU,
    celu=nn.CELU,
    sigmoid=nn.Sigmoid,
    tanh=nn.Tanh,
)

def get_act_layer(name='none'):
    if name in _ACT_LAYER_DEFAULT:
        return _ACT_LAYER_DEFAULT[name]
    else:
        return None

########################################### Function ###########################################
def swish(x):
    return x * torch.sigmoid(x)

def _gelu_python(x):
    """ Original Implementation of the gelu activation function in Google Bert repo when initially created.
        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
        This is now written in C in torch.nn.functional
        Also see https://arxiv.org/abs/1606.08415
    """
    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))

gelu = getattr(F, "gelu", _gelu_python)

def gelu_new(x):
    """ Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
        Also see https://arxiv.org/abs/1606.08415
    """
    return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))

def mish(x):
    return x * torch.tanh(nn.functional.softplus(x))

ACT2FN = {
    "relu": F.relu,
    "swish": swish,
    "gelu": gelu,
    "tanh": F.tanh,
    "gelu_new": gelu_new,
    "mish": mish
}

def get_activation(activation_string):
    if activation_string in ACT2FN:
        return ACT2FN[activation_string]
    else:
        raise KeyError(
            "function {} not found in ACT2FN mapping {} or torch.nn.functional".format(
                activation_string, list(ACT2FN.keys())
            )
        )