File size: 2,505 Bytes
e76c6f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer
import numpy as np
class SeqNetwork(nn.Module):
def __init__(self,
input_dim,
output_dim,
hidden_dim,
lstm_in,
n_layers,
skip_in=(4,),
weight_norm=True,
freeze = False,
use_LSTM = False):
super(SeqNetwork, self).__init__()
self.freeze = freeze
self.skip_in = skip_in
self.num_layers = n_layers
self.weight_norm = weight_norm
self.use_LSTM = use_LSTM
# BERT model
self.bert = BertModel.from_pretrained('bert-base-uncased')
if self.freeze:
for param in self.bert.parameters():
param.requires_grad = False
# Sequential model for BERT embeddings
self.lstm = nn.LSTM(input_size=lstm_in, hidden_size=input_dim, batch_first=True)
dims = [input_dim] + [hidden_dim for _ in range(n_layers)] + [output_dim]
for l in range(0, self.num_layers + 1):
if l+1 in self.skip_in:
out_dim = dims[l + 1] + dims[0]
dims[l + 1] = out_dim
else:
out_dim = dims[l + 1]
lin = nn.Linear(dims[l], out_dim)
if weight_norm:
lin = nn.utils.weight_norm(lin)
setattr(self, "lin" + str(l), lin)
self.activation = nn.ReLU
def forward(self, input_ids, attention_mask):
# BERT embeddings
bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
bert_sequence_output = bert_outputs.pooler_output # Shape: (batch_size, feature_size)
# print(bert_sequence_output.shape)
# LSTM over BERT embeddings
if self.use_LSTM:
lstm_out, (h_n, c_n) = self.lstm(bert_sequence_output)
inputs = h_n[-1] # Use the last hidden state
else:
inputs = bert_sequence_output
x = inputs
for l in range(0, self.num_layers + 1):
lin = getattr(self, "lin" + str(l))
if l in self.skip_in:
x = torch.cat([x, inputs], 1) / np.sqrt(2)
x = lin(x)
if l < self.num_layers:
x = self.activation()(x)
bert_feature = x # Shape: (batch_size, feature_size)
return bert_feature |