File size: 6,827 Bytes
75c583d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import os
os.chdir("/data/public/GenNet")
import sys
import glob
import numpy as np
import pandas as pd
#sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import matplotlib
matplotlib.use('agg')
import tensorflow as tf
import tensorflow.keras as K
import scipy
import tables
tf.keras.backend.set_epsilon(0.0000001)
tf_version = tf.__version__ # ToDo use packaging.version
if tf_version <= '1.13.1':
from GenNet_utils.LocallyDirectedConnected import LocallyDirected1D
print('= or less then 1.13.1: tensorflow version is', tf_version)
elif tf_version >= '2.0':
from GenNet_utils.LocallyDirectedConnected_tf2 import LocallyDirected1D
print('= or more then 2.0: tensorflow version is', tf_version)
else:
print("unexpected tensorflow version")
from GenNet_utils.LocallyDirectedConnected_tf2 import LocallyDirected1D
studyname = 'test_GTEx'
def layer_block(model, mask, i, regression):
if regression:
activation_type="relu"
else:
activation_type="tanh"
model = LocallyDirected1D(mask=mask, filters=1, input_shape=(mask.shape[0], 1),
name="LocallyDirected_" + str(i))(model)
model = K.layers.Activation(activation_type)(model)
model = K.layers.BatchNormalization(center=False, scale=False)(model)
return model
def add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain):
if num_covariates > 0:
model = activation_layer(model, regression, negative_values_ytrain)
model = K.layers.concatenate([model, input_cov], axis=1)
model = K.layers.BatchNormalization(center=False, scale=False)(model)
model = K.layers.Dense(units=1, bias_initializer= tf.keras.initializers.Constant(mean_ytrain))(model)
return model
def activation_layer(model, regression, negative_values_ytrain):
if regression:
if negative_values_ytrain:
model = K.layers.Activation("linear")(model)
print('using a linear activation function')
else:
model = K.layers.Activation("relu")(model)
print('using a relu activation function')
else:
model = K.layers.Activation("sigmoid")(model)
return model
def create_network_from_npz(datapath,
inputsize,
genotype_path,
l1_value=0.01,
regression=False,
num_covariates=0,
mask_order = []):
print("Creating networks from npz masks")
print("regression", regression)
if regression:
mean_ytrain, negative_values_ytrain = regression_properties(datapath)
else:
mean_ytrain = 0
negative_values_ytrain = False
masks = []
mask_shapes_x = []
mask_shapes_y = []
print(mask_order)
if len(mask_order) > 0: # if mask_order is defined we use this order
for mask in mask_order:
mask = scipy.sparse.load_npz(datapath + '/'+str(mask)+'.npz')
masks.append(mask)
mask_shapes_x.append(mask.shape[0])
mask_shapes_y.append(mask.shape[1])
for x in range(len(masks) - 1): # check that the masks fit eachother
assert mask_shapes_y[x] == mask_shapes_x[x + 1]
else:
# if mask order is not defined we can sort the mask by the size
for npz_path in glob.glob(datapath + '/*.npz'):
mask = scipy.sparse.load_npz(npz_path)
masks.append(mask)
mask_shapes_x.append(mask.shape[0])
mask_shapes_y.append(mask.shape[1])
for i in range(len(masks)): # sort all the masks in the correct order
argsort_x = np.argsort(mask_shapes_x)[::-1]
argsort_y = np.argsort(mask_shapes_y)[::-1]
mask_shapes_x = np.array(mask_shapes_x)
mask_shapes_y = np.array(mask_shapes_y)
assert all(argsort_x == argsort_y) # check that both dimensions have the same order
masks = [masks[i] for i in argsort_y] # sort masks
mask_shapes_x = mask_shapes_x[argsort_x]
mask_shapes_y = mask_shapes_y[argsort_y]
for x in range(len(masks) - 1): # check that the masks fit eachother
assert mask_shapes_y[x] == mask_shapes_x[x + 1]
print('mask_shapes_x[0]', mask_shapes_x[0])
assert mask_shapes_x[0] == inputsize
print('mask_shapes_y[-1]', mask_shapes_y[-1])
if mask_shapes_y[-1] == 1: # should we end with a dense layer?
all_masks_available = True
else:
all_masks_available = False
input_layer = K.Input((inputsize,), name='input_layer')
input_cov = K.Input((num_covariates,), name='inputs_cov')
model = K.layers.Reshape(input_shape=(inputsize,), target_shape=(inputsize, 1))(input_layer)
for i in range(len(masks)):
mask = masks[i]
model = layer_block(model, mask, i, regression)
model = K.layers.Flatten()(model)
if all_masks_available:
model = LocallyDirected1D(mask=masks[-1], filters=1, input_shape=(mask.shape[0], 1),
name="output_layer")(model)
else:
model = K.layers.Dense(units=1, name="output_layer",
kernel_regularizer=tf.keras.regularizers.l1(l=l1_value)
)(model)
model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain)
output_layer = activation_layer(model, regression, negative_values_ytrain)
model = K.Model(inputs=[input_layer, input_cov], outputs=output_layer)
print(model.summary())
return model, masks
def get_testdata(datapath):
# ytest = pd.read_csv(datapath + "ytest_"+studyname+".csv")
h5file = tables.open_file(datapath + studyname + '_genotype_processed.h5', "r")
# ybatch = ytest["labels"]
# xbatchid = np.array(ytest["tot_index"].values, dtype=np.int64)
xbatch = h5file.root.data[:]
# ybatch = np.reshape(np.array(ybatch), (-1, 1))
h5file.close()
return xbatch
def predict():
xtest = get_testdata(datapath)
pred = model.predict(xtest)
print('model prediction: ', pred)
datapath = '/data/public/GenNet/processed_data/'
inputsize = 6986636
num_covariates = 0
genotype_path = datapath
l1_value = 0.001
model, masks = create_network_from_npz(datapath=datapath, inputsize=inputsize, genotype_path=genotype_path,mask_order=['UKBB_sparse_connection_mask_ensmb_alligned', 'gene_ensmbl_GTEx_mask_tstat'],
l1_value=l1_value, regression=False, num_covariates=num_covariates,)
model.load_weights(datapath + 'bestweight_job_hypertension.h5')
print('weights have been loaded')
predict()
|