File size: 6,827 Bytes
75c583d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import os
os.chdir("/data/public/GenNet")
import sys
import glob
import numpy as np
import pandas as pd
#sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import matplotlib
matplotlib.use('agg')
import tensorflow as tf
import tensorflow.keras as K
import scipy
import tables

tf.keras.backend.set_epsilon(0.0000001)
tf_version = tf.__version__  # ToDo use packaging.version
if tf_version <= '1.13.1':
    from GenNet_utils.LocallyDirectedConnected import LocallyDirected1D
    print('= or less then 1.13.1: tensorflow version is', tf_version)
elif tf_version >= '2.0':
    from GenNet_utils.LocallyDirectedConnected_tf2 import LocallyDirected1D
    print('= or more then 2.0: tensorflow version is', tf_version)
else:
    print("unexpected tensorflow version")
    from GenNet_utils.LocallyDirectedConnected_tf2 import LocallyDirected1D

studyname = 'test_GTEx'

def layer_block(model, mask, i, regression):
    
    if regression:
        activation_type="relu"
    else:
        activation_type="tanh"
    
    model = LocallyDirected1D(mask=mask, filters=1, input_shape=(mask.shape[0], 1),
                              name="LocallyDirected_" + str(i))(model)
    
    model = K.layers.Activation(activation_type)(model)
    model = K.layers.BatchNormalization(center=False, scale=False)(model)
    return model


def add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain):
    if num_covariates > 0:
        model = activation_layer(model, regression, negative_values_ytrain)
        model = K.layers.concatenate([model, input_cov], axis=1)
        model = K.layers.BatchNormalization(center=False, scale=False)(model)
        model = K.layers.Dense(units=1, bias_initializer= tf.keras.initializers.Constant(mean_ytrain))(model)
    return model


def activation_layer(model, regression, negative_values_ytrain):
   
    if regression: 
        if negative_values_ytrain:
            model = K.layers.Activation("linear")(model)
            print('using a linear activation function')
        else:
            model = K.layers.Activation("relu")(model)
            print('using a relu activation function')
    else:
        model = K.layers.Activation("sigmoid")(model)
        
    return model

def create_network_from_npz(datapath,
                            inputsize,
                            genotype_path,
                            l1_value=0.01,
                            regression=False,
                            num_covariates=0,
                            mask_order = []):
    print("Creating networks from npz masks")
    print("regression", regression)
    if regression:
        mean_ytrain, negative_values_ytrain = regression_properties(datapath)
    else:
        mean_ytrain = 0
        negative_values_ytrain = False

    masks = []
    mask_shapes_x = []
    mask_shapes_y = []

    print(mask_order)

    if len(mask_order) > 0:  # if mask_order is defined we use this order
        for mask in mask_order:
            mask = scipy.sparse.load_npz(datapath + '/'+str(mask)+'.npz')
            masks.append(mask)
            mask_shapes_x.append(mask.shape[0])
            mask_shapes_y.append(mask.shape[1])

        for x in range(len(masks) - 1):  # check that the masks fit eachother
            assert mask_shapes_y[x] == mask_shapes_x[x + 1]
    else:
        # if mask order is not defined we can sort the mask by the size
        for npz_path in glob.glob(datapath + '/*.npz'):
            mask = scipy.sparse.load_npz(npz_path)
            masks.append(mask)
            mask_shapes_x.append(mask.shape[0])
            mask_shapes_y.append(mask.shape[1])

        for i in range(len(masks)):  # sort all the masks in the correct order
            argsort_x = np.argsort(mask_shapes_x)[::-1]
            argsort_y = np.argsort(mask_shapes_y)[::-1]

            mask_shapes_x = np.array(mask_shapes_x)
            mask_shapes_y = np.array(mask_shapes_y)
            assert all(argsort_x == argsort_y)  # check that both dimensions have the same order

            masks = [masks[i] for i in argsort_y]  # sort masks
            mask_shapes_x = mask_shapes_x[argsort_x]
            mask_shapes_y = mask_shapes_y[argsort_y]

            for x in range(len(masks) - 1):  # check that the masks fit eachother
                assert mask_shapes_y[x] == mask_shapes_x[x + 1]
    print('mask_shapes_x[0]', mask_shapes_x[0])
    assert mask_shapes_x[0] == inputsize
    print('mask_shapes_y[-1]', mask_shapes_y[-1])
    if mask_shapes_y[-1] == 1:  # should we end with a dense layer?
        all_masks_available = True
    else:
        all_masks_available = False

    input_layer = K.Input((inputsize,), name='input_layer')
    input_cov = K.Input((num_covariates,), name='inputs_cov')

    model = K.layers.Reshape(input_shape=(inputsize,), target_shape=(inputsize, 1))(input_layer)
    
    for i in range(len(masks)):
        mask = masks[i]
        model = layer_block(model, mask, i, regression)

    model = K.layers.Flatten()(model)

    if all_masks_available:
        model = LocallyDirected1D(mask=masks[-1], filters=1, input_shape=(mask.shape[0], 1),
                                  name="output_layer")(model)
    else:
        model = K.layers.Dense(units=1, name="output_layer",
                               kernel_regularizer=tf.keras.regularizers.l1(l=l1_value)
                               )(model)

    model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain)

    output_layer = activation_layer(model, regression, negative_values_ytrain)
    model = K.Model(inputs=[input_layer, input_cov], outputs=output_layer)

    print(model.summary())

    return model, masks

def get_testdata(datapath):
    # ytest = pd.read_csv(datapath + "ytest_"+studyname+".csv")
    h5file = tables.open_file(datapath  + studyname + '_genotype_processed.h5', "r")
    # ybatch = ytest["labels"]
    # xbatchid = np.array(ytest["tot_index"].values, dtype=np.int64)
    xbatch = h5file.root.data[:]
    # ybatch = np.reshape(np.array(ybatch), (-1, 1))
    h5file.close()
    return xbatch

def predict():
    xtest = get_testdata(datapath)
    pred = model.predict(xtest)
    print('model prediction: ', pred)

datapath = '/data/public/GenNet/processed_data/'
inputsize = 6986636
num_covariates = 0
genotype_path = datapath
l1_value = 0.001
model, masks = create_network_from_npz(datapath=datapath, inputsize=inputsize, genotype_path=genotype_path,mask_order=['UKBB_sparse_connection_mask_ensmb_alligned', 'gene_ensmbl_GTEx_mask_tstat'],
                                               l1_value=l1_value, regression=False, num_covariates=num_covariates,)
model.load_weights(datapath + 'bestweight_job_hypertension.h5')
print('weights have been loaded')
predict()