File size: 1,561 Bytes
ad16788 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
#!/usr/bin/env python
# Copyright 2019 Kyoto University (Hirofumi Inaguma)
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
"""Initialization functions for RNN sequence-to-sequence models."""
import math
def lecun_normal_init_parameters(module):
"""Initialize parameters in the LeCun's manner."""
for p in module.parameters():
data = p.data
if data.dim() == 1:
# bias
data.zero_()
elif data.dim() == 2:
# linear weight
n = data.size(1)
stdv = 1.0 / math.sqrt(n)
data.normal_(0, stdv)
elif data.dim() in (3, 4):
# conv weight
n = data.size(1)
for k in data.size()[2:]:
n *= k
stdv = 1.0 / math.sqrt(n)
data.normal_(0, stdv)
else:
raise NotImplementedError
def uniform_init_parameters(module):
"""Initialize parameters with an uniform distribution."""
for p in module.parameters():
data = p.data
if data.dim() == 1:
# bias
data.uniform_(-0.1, 0.1)
elif data.dim() == 2:
# linear weight
data.uniform_(-0.1, 0.1)
elif data.dim() in (3, 4):
# conv weight
pass # use the pytorch default
else:
raise NotImplementedError
def set_forget_bias_to_one(bias):
"""Initialize a bias vector in the forget gate with one."""
n = bias.size(0)
start, end = n // 4, n // 2
bias.data[start:end].fill_(1.0)
|