File size: 5,779 Bytes
e1af10a 06ae167 e1af10a 06ae167 e1af10a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import os
import torch
from torch import nn
from transformers import AutoModel
from huggingface_hub import hf_hub_download
token=os.getenv("HF_TOKEN")
repo_id = "Siyunb323/CreativityEvaluation"
model = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
class BERTregressor(nn.Module):
def __init__(self, bert, hidden_size=768, num_linear=1, dropout=0.1,
o_type='cls', t_type= 'C', use_sigmoid=False):
super(BERTregressor, self).__init__()
self.encoder = bert
self.o_type = o_type
self.t_type = t_type
self.sigmoid = use_sigmoid
if num_linear == 2:
layers = [nn.Linear(hidden_size, 128),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, 1)]
elif num_linear == 1:
layers = [nn.Dropout(dropout),
nn.Linear(hidden_size, 1)]
if use_sigmoid:
layers.append(nn.Sigmoid())
self.output = nn.Sequential(*layers)
def forward(self, inputs, return_attention=False):
X = {'input_ids':inputs['input_ids'],
'token_type_ids':inputs['token_type_ids'],
'attention_mask':inputs['attention_mask'],
'output_attentions':return_attention}
encoded_X = self.encoder(**X)
if self.o_type == 'cls':
output = self.output(encoded_X.last_hidden_state[:, 0, :])
elif self.o_type == 'pooler':
output = self.output(encoded_X.pooler_output)
output = 4 * output.squeeze(-1) + 1 if self.sigmoid and self.t_type == 'C' else output.squeeze(-1)
return output if not return_attention else (output, encoded_X.attentions)
class Effectiveness(nn.Module):
def __init__(self, num_layers, hidden_size=768, use_sigmoid=True, dropout=0.2, **kwargs):
super(Effectiveness, self).__init__(**kwargs)
self.sigmoid = use_sigmoid
if num_layers == 2:
layers = [
nn.Linear(hidden_size, 128),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, 1)
]
else:
layers = [
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(hidden_size, 1)
]
if use_sigmoid:
layers.append(nn.Sigmoid()) # 仅在需要时添加 Sigmoid 层
self.output = nn.Sequential(*layers)
def forward(self, X):
output = self.output(X)
# 如果使用 Sigmoid 层,调整输出范围到 [1, 5]
if self.sigmoid:
return 4 * output.squeeze(-1) + 1
else:
return output.squeeze(-1)
class Creativity(nn.Module):
"""BERT的下一句预测任务"""
def __init__(self, num_layers, hidden_size=768, use_sigmoid=True, dropout=0.2, **kwargs):
super(Creativity, self).__init__(**kwargs)
self.sigmoid = use_sigmoid
if num_layers == 2:
layers = [
nn.Linear(hidden_size, 128),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(128, 1)
]
else:
layers = [
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(hidden_size, 1)
]
if use_sigmoid:
layers.append(nn.Sigmoid()) # 仅在需要时添加 Sigmoid 层
self.output = nn.Sequential(*layers)
def forward(self, X):
output = self.output(X)
# 如果使用 Sigmoid 层,调整输出范围到 [1, 5]
if self.sigmoid:
return 4 * output.squeeze(-1) + 1
else:
return output.squeeze(-1)
class BERT2Phase(nn.Module):
def __init__(self, bert, hidden_size=768, type='cls',
num_linear=1, dropout=0.1, use_sigmoid=False):
super(BERT2Phase, self).__init__()
self.encoder = bert
self.type = type
self.sigmoid = use_sigmoid
self.effectiveness = Effectiveness(num_linear, hidden_size, use_sigmoid, dropout)
self.creativity = Creativity(num_linear, hidden_size, use_sigmoid, dropout)
def forward(self, inputs, return_attention=False):
X = {'input_ids':inputs['input_ids'],
'token_type_ids':inputs['token_type_ids'],
'attention_mask':inputs['attention_mask'],
'output_attentions':return_attention}
encoded_X = self.encoder(**X)
if self.type == 'cls':
e_pred = self.effectiveness(encoded_X.last_hidden_state[:, 0, :])
c_pred = self.creativity(encoded_X.last_hidden_state[:, 0, :])
elif self.type == 'pooler':
e_pred = self.effectiveness(encoded_X.pooler_output)
c_pred = self.creativity(encoded_X.pooler_output)
return (c_pred, e_pred) if not return_attention else (c_pred, e_pred, encoded_X.attentions)
def load_model(model_name, pooling_method):
pooling = pooling_method if pooling_method == 'cls' else 'pooler'
if model_name == "One-phase Fine-tuned BERT":
loaded_net = BERTregressor(model, hidden_size=768, num_linear=1, dropout=0.1, o_type=pooling, t_type='C', use_sigmoid=True)
filename = 'model' + f"/OnePhase_BERT_{pooling_method}.pth"
elif model_name == "Two-phase Fine-tuned BERT":
loaded_net = BERT2Phase(model, hidden_size=768, num_linear=1, dropout=0.1, type=pooling, use_sigmoid=True)
filename = 'model' + f"/TwoPhase_BERT_{pooling_method}.pth"
model_path = hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token)
loaded_net.load_state_dict(torch.load(model_path))
loaded_net.eval()
return loaded_net
|