File size: 2,957 Bytes
d08668b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import torch
import torch.nn as nn
import json

def attention(Q, K, V):	
	d = K.shape[-1]
	QK = Q @ K.transpose(-2, -1)
	QK_d = QK / (d ** 0.5)
	weights = torch.softmax(QK_d, axis=-1)
	outputs = weights @ V
	return outputs

class Attention(torch.nn.Module):
	def __init__(self, emb_dim, n_heads):
		super(Attention, self).__init__()

		self.emb_dim = emb_dim
		self.n_heads = n_heads

	def forward(self, X):

		batch_size, seq_len, emb_dim = X.size() # (batch_size, seq_len, emb_dim)
		n_heads = self.n_heads
		emb_dim_per_head = emb_dim // n_heads

		assert emb_dim == self.emb_dim
		assert emb_dim_per_head * n_heads == emb_dim

		X = X.transpose(1, 2)
		output = attention(X, X, X)       # (batch_size, n_heads, seq_len, emb_dim_per_head)
		output = output.transpose(1, 2)   # (batch_size, seq_len, n_heads, emb_dim_per_head)
		output = output.contiguous().view(batch_size, seq_len, emb_dim)    # (batch_size, seq_len, emb_dim)

		return output

class ClassifierAttention(nn.Module):
	def __init__(self, vocab_size, emb_dim, padding_idx, hidden_size, n_layers, attention_heads, hidden_layer_units, dropout):
		super(ClassifierAttention, self).__init__()

		self.embedding = nn.Embedding(
			num_embeddings = vocab_size,
			embedding_dim = emb_dim,
			padding_idx = padding_idx
		)

		self.rnn_1 = nn.LSTM(
			emb_dim,
			hidden_size,
			n_layers,
			bidirectional = False,
			batch_first = True,
		)

		self.attention = Attention(hidden_size, attention_heads)

		self.rnn_2 = nn.LSTM(
			hidden_size,
			hidden_size,
			n_layers,
			bidirectional = False,
			batch_first = True,
		)

		self.dropout = nn.Dropout(dropout)

		hidden_layer_units = [hidden_size, *hidden_layer_units]
		self.hidden_layers = nn.ModuleList([])
		for in_unit, out_unit in zip(hidden_layer_units[:-1], hidden_layer_units[1:]):
			self.hidden_layers.append(nn.Linear(in_unit, out_unit))
			self.hidden_layers.append(nn.ReLU())
			self.hidden_layers.append(self.dropout)
		self.hidden_layers.append(nn.Linear(hidden_layer_units[-1], 1))

		self.sigmoid = nn.Sigmoid()
		
	def forward(self, x):
		# x: (batch_size, seq_len)

		out = self.embedding(x)     # (batch_size, seq_len, emb_dim)
		out, (hidden_state, cell_state) = self.rnn_1(out)
		out = self.attention(out)    # (batch_size, seq_len, emb_dim)
		out = self.dropout(out)
		output, (hidden_state, cell_state) = self.rnn_2(out)
		out = hidden_state[-1]      # (batch_size, hidden_size)
		out = self.dropout(out)
		# (batch_size, seq_len, hidden_dim)
		# (n_layers*n_direction, batch_size, hidden_size)
		# (n_layers*n_direction, batch_size, hidden_size)

		for layer in self.hidden_layers:
			out = layer(out)

		out = self.sigmoid(out) # (batch_size, 1)
		out = out.squeeze(-1)   # (batch_size)

		return out

def get_model(model_path, params_path):
	with open(params_path, 'rb') as f:
		params = json.load(f)

	model = ClassifierAttention(*params)
	model.load_state_dict(torch.load(model_path))
	model.eval()

	return model