File size: 1,561 Bytes
ad16788
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python

# Copyright 2019 Kyoto University (Hirofumi Inaguma)
#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)

"""Initialization functions for RNN sequence-to-sequence models."""

import math


def lecun_normal_init_parameters(module):
    """Initialize parameters in the LeCun's manner."""
    for p in module.parameters():
        data = p.data
        if data.dim() == 1:
            # bias
            data.zero_()
        elif data.dim() == 2:
            # linear weight
            n = data.size(1)
            stdv = 1.0 / math.sqrt(n)
            data.normal_(0, stdv)
        elif data.dim() in (3, 4):
            # conv weight
            n = data.size(1)
            for k in data.size()[2:]:
                n *= k
            stdv = 1.0 / math.sqrt(n)
            data.normal_(0, stdv)
        else:
            raise NotImplementedError


def uniform_init_parameters(module):
    """Initialize parameters with an uniform distribution."""
    for p in module.parameters():
        data = p.data
        if data.dim() == 1:
            # bias
            data.uniform_(-0.1, 0.1)
        elif data.dim() == 2:
            # linear weight
            data.uniform_(-0.1, 0.1)
        elif data.dim() in (3, 4):
            # conv weight
            pass  # use the pytorch default
        else:
            raise NotImplementedError


def set_forget_bias_to_one(bias):
    """Initialize a bias vector in the forget gate with one."""
    n = bias.size(0)
    start, end = n // 4, n // 2
    bias.data[start:end].fill_(1.0)