File size: 1,975 Bytes
32b542e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import math
import torch
from torch import nn
import torch.nn.functional as F
__all__ = ["get_act_layer", "get_activation"]
########################################### Layer ###########################################
_ACT_LAYER_DEFAULT = dict(
relu=nn.ReLU,
elu=nn.ELU,
celu=nn.CELU,
sigmoid=nn.Sigmoid,
tanh=nn.Tanh,
)
def get_act_layer(name='none'):
if name in _ACT_LAYER_DEFAULT:
return _ACT_LAYER_DEFAULT[name]
else:
return None
########################################### Function ###########################################
def swish(x):
return x * torch.sigmoid(x)
def _gelu_python(x):
""" Original Implementation of the gelu activation function in Google Bert repo when initially created.
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
This is now written in C in torch.nn.functional
Also see https://arxiv.org/abs/1606.08415
"""
return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
gelu = getattr(F, "gelu", _gelu_python)
def gelu_new(x):
""" Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT).
Also see https://arxiv.org/abs/1606.08415
"""
return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
def mish(x):
return x * torch.tanh(nn.functional.softplus(x))
ACT2FN = {
"relu": F.relu,
"swish": swish,
"gelu": gelu,
"tanh": F.tanh,
"gelu_new": gelu_new,
"mish": mish
}
def get_activation(activation_string):
if activation_string in ACT2FN:
return ACT2FN[activation_string]
else:
raise KeyError(
"function {} not found in ACT2FN mapping {} or torch.nn.functional".format(
activation_string, list(ACT2FN.keys())
)
)
|