Runtime error
Runtime error
from turtle import forward | |
import dgl | |
import math | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import dgl.function as fn | |
from dgl.nn.functional import edge_softmax | |
from FairGNN.src.models.GCN import GCN | |
from RHGN.layers import * | |
from RHGN.layers import RHGNLayer | |
class RHGN_adv(nn.Module): | |
def __init__(self, G, node_dict, edge_dict, n_inp, n_hid, n_out, n_layers, n_heads, cid1_feature, cid2_feature, cid3_feature): | |
super(RHGN_adv, self).__init__() | |
self.cid1_feature = nn.Embedding(cid1_feature.size(0), cid1_feature.size(1)) | |
self.cid1_feature.weight = nn.Parameter(cid1_feature) | |
self.cid1_feature.weight.requires_grad = False | |
self.cid2_feature = nn.Embedding(cid2_feature.size(0), cid2_feature.size(1)) | |
self.cid2_feature.weight = nn.Parameter(cid2_feature) | |
self.cid2_feature.weight.requires_grad = False | |
self.cid3_feature= nn.Embedding(cid3_feature.size(0), cid3_feature.size(1)) | |
self.cid3_feature.weight = nn.Parameter(cid3_feature) | |
self.cid3_feature.weight.requires_grad = False | |
self.adv_model = nn.Linear(n_hid, 1) # was n_out | |
#self.sens_model = nn.Linear(64, 2) | |
self.sens_model = GCN(200, 128, 1, 0.5) | |
#self.optimizer_A = torch.optim.Adam(self.adv_model.parameters(), lr=0.1, weight_decay=1e-5) | |
#self.A_loss = 0 | |
def forward(self, h, inputs, G, blocks, out_key, label_key, is_train=True, print_flag=False): | |
# h from orignal model | |
#s = self.sens_model(h) | |
inputs_new = inputs[0] | |
print('graph:', G) | |
s = self.sens_model(G, inputs_new) | |
print('inputs:', inputs.shape) | |
s_g = self.adv_model(h) | |
print('s:', s.shape) | |
print('s_g:', s_g.shape) | |
return s, s_g | |
class ali_RHGN(nn.Module): | |
def __init__(self, G, node_dict, edge_dict, n_inp, n_hid, n_out, n_layers, n_heads,cid1_feature,cid2_feature,cid3_feature, use_norm = True): | |
super(ali_RHGN, self).__init__() | |
self.node_dict = node_dict | |
self.edge_dict = edge_dict | |
self.gcs = nn.ModuleList() | |
self.n_inp = n_inp | |
self.n_hid = n_hid | |
self.n_out = n_out | |
self.n_layers = n_layers | |
self.adapt_ws = nn.ModuleList() | |
for t in range(len(node_dict)): | |
self.adapt_ws.append(nn.Linear(n_inp, n_hid)) | |
for _ in range(n_layers): | |
self.gcs.append(RHGNLayer(n_hid, n_hid, node_dict, edge_dict, n_heads, use_norm = use_norm)) | |
self.out = nn.Linear(n_hid, n_out) | |
self.cid1_feature= nn.Embedding(cid1_feature.size(0), cid1_feature.size(1)) | |
self.cid1_feature.weight = nn.Parameter(cid1_feature) | |
self.cid1_feature.weight.requires_grad = False | |
self.cid2_feature= nn.Embedding(cid2_feature.size(0), cid2_feature.size(1)) | |
self.cid2_feature.weight = nn.Parameter(cid2_feature) | |
self.cid2_feature.weight.requires_grad = False | |
self.cid3_feature= nn.Embedding(cid3_feature.size(0), cid3_feature.size(1)) | |
self.cid3_feature.weight = nn.Parameter(cid3_feature) | |
self.cid3_feature.weight.requires_grad = False | |
self.excitation = nn.Sequential( | |
nn.Linear(3, 32, bias=False), | |
nn.ReLU(), | |
nn.Linear(32, 3, bias=False), | |
nn.ReLU() | |
) | |
self.query = nn.Linear(200, n_inp) | |
self.key = nn.Linear(200, n_inp) | |
self.value = nn.Linear(200, n_inp) | |
self.skip = nn.Parameter(torch.ones(1)) | |
print('n_out:', self.n_out) | |
#self.query_sens = nn.Linear(200, n_inp) | |
#self.key_sens = nn.Linear(200, n_inp) | |
#self.value_sens = nn.Linear(200, n_inp) | |
#self.adv_model = nn.Linear(128, 1) | |
#self.adv_model = nn.Linear(n_hid, n_out) | |
#self.sens_model = GCN(95, 128, 1, 0.5) | |
#self.sens_model = nn.Linear(n_hid, n_out) | |
#self.sens_model2 = nn.Linear(n_inp, n_hid) | |
#self.sens_model3 = nn.Linear(n_hid, n_out) | |
#self.optimizer_A = torch.optim.Adam(self.adv_model.parameters(), lr=0.1, weight_decay=1e-5) | |
#self.criterion = nn.BCEWithLogitsLoss() | |
#self.optimizer_G = torch.optim.Adam(self.parameters()) | |
#self.A_loss = 0 | |
#self.G_loss = 0 | |
#self.scheduler = torch.optim.lr_scheduler.OneCycleLR(self.optimizer_G, epochs=epochs, | |
# steps_per_epoch=int(train_idx.shape[0]/batch_size)+1,max_lr = lr) | |
def forward(self, input_nodes, output_nodes,blocks, out_key,label_key, is_train=True,print_flag=False): | |
item_cid1=blocks[0].srcnodes['item'].data['cid1'].unsqueeze(1) #(N,1) | |
cid1_feature = self.cid1_feature(item_cid1) # #(N,1,200) | |
item_cid2=blocks[0].srcnodes['item'].data['cid2'].unsqueeze(1) #(N,1) | |
cid2_feature = self.cid2_feature(item_cid2) # #(N,1,200) | |
item_cid3=blocks[0].srcnodes['item'].data['cid3'].unsqueeze(1) #(N,1) | |
cid3_feature = self.cid3_feature(item_cid3) # #(N,1,200) | |
cid2_feature=cid1_feature | |
cid3_feature=cid1_feature | |
item_feature = blocks[0].srcnodes['item'].data['inp'] | |
user_feature = blocks[0].srcnodes['user'].data['inp'] | |
# brand_feature = blocks[0].srcnodes['brand'].data['inp'] | |,cid2_feature,cid3_feature),1) #(N,4,200) | |
#print('inputs:', inputs.shape) # (455, 3, 200) | |
k = self.key(inputs) #(N,4,n_inp) | |
v = self.value(inputs) #(N,4,n_inp) | |
q = self.query(item_feature.unsqueeze(-2)) #(N,1,n_inp) | |
att_score = torch.einsum("bij,bjk->bik", k, q.transpose(1,2)) / math.sqrt(200) #(N,4,1) | |
att_score = torch.softmax(att_score, axis=1) # (N,4,1) | |
alpha = torch.sigmoid(self.skip) #(1,) | |
temp = v * att_score #(N,4,n_inp) | |
item_feature = alpha*(torch.mean(temp, dim=-2).squeeze(-2)) + (1-alpha)*item_feature # #(N,200) | |
#print('item_feature:', item_feature) | |
h = {} | |
h['item']=F.gelu(self.adapt_ws[self.node_dict['item']](item_feature)) | |
h['user']=F.gelu(self.adapt_ws[self.node_dict['user']](user_feature)) | |
# h['brand']=F.gelu(self.adapt_ws[self.node_dict['brand']](brand_feature)) | |
for i in range(self.n_layers): | |
h = self.gcs[i](blocks[i], h, is_train=is_train,print_flag=print_flag) | |
h = h[out_key] | |
#print('h:', h) | |
#self.adv_model.requires_grad_(False) | |
#add sens model input | |
#s = self.sens_model(inputs) | |
#s = self.sens_model2(s) | |
#s = self.sens_model3(s) | |
#add adv model input | |
#s_g = self.adv_model(h) | |
h_new=self.out(h) | |
#print('h_new:', h_new.shape) | |
labels=blocks[-1].dstnodes[out_key].data[label_key] | |
# h=F.log_softmax(h, dim=1) | |
# return will be h, labels, and estimator output | |
return h_new, labels | |
class jd_RHGN(nn.Module): | |
def __init__(self, G, node_dict, edge_dict, n_inp, n_hid, n_out, n_layers, n_heads, cid1_feature, cid2_feature, | |
cid3_feature, cid4_feature, use_norm=True, ): | |
super(jd_RHGN, self).__init__() | |
self.node_dict = node_dict | |
self.edge_dict = edge_dict | |
self.gcs = nn.ModuleList() | |
self.n_inp = n_inp | |
self.n_hid = n_hid | |
self.n_out = n_out | |
self.n_layers = n_layers | |
self.adapt_ws = nn.ModuleList() | |
for t in range(len(node_dict)): | |
self.adapt_ws.append(nn.Linear(n_inp, n_hid)) | |
for _ in range(n_layers): | |
self.gcs.append(RHGNLayer(n_hid, n_hid, node_dict, edge_dict, n_heads, use_norm=use_norm)) | |
self.out = nn.Linear(n_hid, n_out) | |
self.cid1_feature = nn.Embedding(cid1_feature.size(0), cid1_feature.size(1)) | |
self.cid1_feature.weight = nn.Parameter(cid1_feature) | |
self.cid1_feature.weight.requires_grad = False | |
self.cid2_feature = nn.Embedding(cid2_feature.size(0), cid2_feature.size(1)) | |
self.cid2_feature.weight = nn.Parameter(cid2_feature) | |
self.cid2_feature.weight.requires_grad = False | |
self.cid3_feature = nn.Embedding(cid3_feature.size(0), cid3_feature.size(1)) | |
self.cid3_feature.weight = nn.Parameter(cid3_feature) | |
self.cid3_feature.weight.requires_grad = False | |
self.cid4_feature = nn.Embedding(cid4_feature.size(0), cid4_feature.size(1)) | |
self.cid4_feature.weight = nn.Parameter(cid4_feature) | |
self.cid4_feature.weight.requires_grad = False | |
self.excitation = nn.Sequential( | |
nn.Linear(4, 32, bias=False), | |
nn.ReLU(), | |
nn.Linear(32, 4, bias=False), | |
nn.ReLU() | |
) | |
self.query = nn.Linear(200, n_inp) | |
self.key = nn.Linear(200, n_inp) | |
self.value = nn.Linear(200, n_inp) | |
self.skip = nn.Parameter(torch.ones(1)) | |
self.l1=nn.Linear(200, n_inp) | |
self.l2=nn.Linear(200, n_inp) | |
self.l3=nn.Linear(200, n_inp) | |
self.l4=nn.Linear(200, n_inp) | |
def forward(self, input_nodes, output_nodes, blocks, out_key, label_key, is_train=True,print_flag=False): | |
item_cid1 = blocks[0].srcnodes['item'].data['cid1'].unsqueeze(1) # (N,1) | |
cid1_feature = self.cid1_feature(item_cid1) # #(N,1,200) | |
#cid1_feature = self.l1(cid1_feature) | |
item_cid2 = blocks[0].srcnodes['item'].data['cid2'].unsqueeze(1) # (N,1) | |
cid2_feature = self.cid2_feature(item_cid2) # #(N,1,200) | |
#cid2_feature = self.l2(cid2_feature) | |
item_cid3 = blocks[0].srcnodes['item'].data['cid3'].unsqueeze(1) # (N,1) | |
cid3_feature = self.cid3_feature(item_cid3) # #(N,1,200) | |
#cid3_fature = self.l3(cid3_feature) | |
# item_cid4 = blocks[0].srcnodes['item'].data['brand'].unsqueeze(1) # (N,1) | |
# cid4_feature = self.cid4_feature(item_cid4) # #(N,1,200) | |
#cid4_feature = self.l4(cid4_feature) | |
cid2_feature=cid1_feature | |
cid3_feature=cid1_feature | |
# cid4_feature=cid1_feature | |
item_feature = blocks[0].srcnodes['item'].data['inp'] | |
user_feature = blocks[0].srcnodes['user'].data['inp'] | |
# inputs =, cid2_feature, cid3_feature, cid4_feature), 1) # (N,4,200) | |
inputs =, cid2_feature, cid3_feature), 1) # (N,3,200) | |
k = self.key(inputs) # (N,3,200) | |
v = self.value(inputs) # (N,3,200) | |
q = self.query(item_feature.unsqueeze(-2)) # (N,1,32) | |
att_score = torch.einsum("bij,bjk->bik", k, q.transpose(1, 2)) / math.sqrt(200) # (N,4,1) | |
att_score = torch.softmax(att_score, axis=1) # (N,4,1) | |
#Z = torch.mean(inputs, dim=-1, out=None) # (N,4) | |
#A = self.excitation(Z).unsqueeze(-1) # (N,4,1) | |
#att_score = att_score + A # (N,4,1) | |
alpha = torch.sigmoid(self.skip) # (1,) | |
temp = v * att_score # (N,4,200) | |
item_feature = alpha * (torch.mean(temp, dim=-2).squeeze(-2)) + (1 - alpha) * item_feature # (N,200) | |
h = {} | |
h['item'] = F.gelu(self.adapt_ws[self.node_dict['item']](item_feature)) | |
h['user'] = F.gelu(self.adapt_ws[self.node_dict['user']](user_feature)) | |
for i in range(self.n_layers): | |
h = self.gcs[i](blocks[i], h, is_train=is_train,print_flag=print_flag) | |
h = h[out_key] | |
h = self.out(h) | |
labels = blocks[-1].dstnodes[out_key].data[label_key] | |
# h=F.log_softmax(h, dim=1) | |
return h, labels | |