Upload 10 files

Browse files

add inferecce code and the trained model file.

Files changed (10) hide show

checkpoints/model.pt +3 -0
inference.py +125 -0
models/__init__.py +0 -0
models/attention.py +128 -0
models/block.py +120 -0
models/configs.py +33 -0
models/embed.py +97 -0
models/encoder.py +56 -0
models/mlp.py +56 -0
models/modeling.py +57 -0

checkpoints/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e7ccea2005799786c18f5adc27b1e6586fc614fb6bf8e56092a44bc23adf398
+size 101028923

inference.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from __future__ import print_function, division
+import os
+import sys
+import time
+import argparse
+import warnings
+import torch
+import pickle
+import torch.nn as nn
+import torch.optim as optim
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from torch.utils.data import Dataset, DataLoader, TensorDataset
+from torchvision import transforms, utils
+from models.modeling import PATHOLOGICAL_CLASSFIER, CONFIGS
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def load_weights(model, weight_path):
+    print("Loading PATHOLOGICAL_CLASSFIER...",weight_path)
+    loadnet = torch.load(weight_path,map_location=device)
+    if "model_state_dict" in loadnet:
+        keyname = "model_state_dict"
+    else:
+        keyname = "model_state_dict"
+    model.load_state_dict(loadnet[keyname], strict=True)
+    return model
+class MyDataset(Dataset):
+    def __init__(self, root_path):
+        m_data = []
+        img_pkl_file_path = os.path.join(root_path, "img_feature")
+        txt_pkl_file_path = os.path.join(root_path, "txt_feature")
+        target_pkl_file_path = os.path.join(root_path, "target")
+        for file in os.listdir(img_pkl_file_path):
+            img_pkl_file = os.path.join(img_pkl_file_path, file)
+            txt_pkl_file = os.path.join(txt_pkl_file_path, file)
+            target_pkl_file = os.path.join(target_pkl_file_path, file)
+            with open(img_pkl_file, "rb") as img_f:
+                img_load_dict = pickle.load(img_f)
+                m_input_img = img_load_dict["img_feature"]
+            with open(txt_pkl_file, "rb") as txt_f:
+                txt_load_dict = pickle.load(txt_f)
+                m_input_txt = txt_load_dict["txt_feature"]
+            with open(target_pkl_file, "rb") as target_f:
+                target_load_dict = pickle.load(target_f)
+                m_output_os = target_load_dict["target_os"]
+                m_output_dfs = target_load_dict["target_dfs"]
+            m_data.append((m_input_img, m_input_txt, m_output_os, m_output_dfs,file))
+        self.m_data = m_data
+    def __getitem__(self, idx):
+        inp_i, inp_txt, oup_os, oup_dfs,f_name = self.m_data[idx]
+        return inp_i, inp_txt, oup_os, oup_dfs,f_name
+    def __len__(self):
+        return len(self.m_data)
+def valid(args):
+    torch.manual_seed(0)
+    num_classes = 2
+    config = CONFIGS["PATHOLOGICAL_CLASSFIER"]
+    model = PATHOLOGICAL_CLASSFIER(config, num_classes=num_classes, vis=True, mm=True)
+    model_path = '/your/trained/model/path/'
+    p_c_model = load_weights(model, model_path)
+    p_c_model.to(device)
+    test_dataset = MyDataset("/your/dataset/path/" )
+    test_loader = DataLoader(test_dataset, batch_size=1)
+        # #----- Test ------
+    print("--------Start testing-------")
+    p_c_model.eval()
+    valid_1_acc = 0
+    valid_1_total = 0
+    valid_1_cnt = 0
+    valid_2_acc = 0
+    valid_2_total = 0
+    valid_2_cnt = 0
+    valid_total_cnt=0
+    target_cnt_0=0
+    target_cnt_1=0
+    with torch.no_grad():
+        for imgs, txt, target_1, target_2,file_name in test_loader:
+            output_1, output_2, = model(imgs.to(device), txt.to(device))
+            out_1_list_prob = (torch.softmax(output_1.squeeze(1), axis=-1).cpu().numpy().tolist())
+            out_1_list = (torch.argmax(output_1.squeeze(1), axis=-1).cpu().numpy().tolist())
+            target_1_list = target_1.tolist()
+            out_2_list = (torch.argmax(output_2.squeeze(1), axis=-1).cpu().numpy().tolist())
+            target_2_list = target_2.tolist()
+            valid_1_total += len(out_1_list)
+            valid_2_total += len(out_2_list)
+            for i in range(len(out_1_list)):
+                if out_1_list[i] == target_1_list[i]:
+                    valid_1_cnt += 1
+                if out_2_list[i] == target_2_list[i]:
+                    valid_2_cnt += 1
+                if out_1_list[i] == target_1_list[i] and out_2_list[i] == target_2_list[i]:
+                    valid_total_cnt+=1
+        valid_1_acc = valid_1_cnt / valid_1_total
+        valid_2_acc = valid_2_cnt / valid_2_total
+        valid_total_acc =valid_total_cnt/valid_1_total
+        print(valid_1_acc,valid_1_total, valid_2_acc,valid_2_total,valid_total_acc,valid_total_cnt)
+        print("="*100)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="")
+    args = parser.parse_args()
+    valid(args)

models/__init__.py ADDED Viewed

File without changes

models/attention.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import torch
+import torch.nn as nn
+from torch.nn import BCEWithLogitsLoss,CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d, LayerNorm
+import models.configs as configs
+import math
+class Attention(nn.Module):
+    def __init__(self, config, vis, mm=True):
+        super(Attention, self).__init__()
+        self.vis = vis
+        self.num_attention_heads = config.transformer["num_heads"]
+        self.attention_head_size = int(config.hidden_size / self.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = Linear(config.hidden_size, self.all_head_size)
+        self.key = Linear(config.hidden_size, self.all_head_size)
+        self.value = Linear(config.hidden_size, self.all_head_size)
+        if mm:
+            self.query_text = Linear(config.hidden_size, self.all_head_size)
+            self.key_text = Linear(config.hidden_size, self.all_head_size)
+            self.value_text = Linear(config.hidden_size, self.all_head_size)
+            self.out_text = Linear(config.hidden_size, config.hidden_size)
+            self.attn_dropout_text = Dropout(config.transformer["attention_dropout_rate"])
+            self.attn_dropout_it = Dropout(config.transformer["attention_dropout_rate"])
+            self.attn_dropout_ti = Dropout(config.transformer["attention_dropout_rate"])
+            self.proj_dropout_text = Dropout(config.transformer["attention_dropout_rate"])
+        self.out = Linear(config.hidden_size, config.hidden_size)
+        self.attn_dropout = Dropout(config.transformer["attention_dropout_rate"])
+        self.proj_dropout = Dropout(config.transformer["attention_dropout_rate"])
+        self.softmax = Softmax(dim=-1)
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+    def forward(self, hidden_states, text=None):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(hidden_states)
+        mixed_value_layer = self.value(hidden_states)
+        if text is not None:
+            text_q = self.query_text(text)
+            text_k = self.key_text(text)
+            text_v = self.value_text(text)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        if text is not None:
+            query_layer_img = query_layer
+            key_layer_img = key_layer
+            value_layer_img = value_layer
+            query_layer_text = self.transpose_for_scores(text_q)
+            key_layer_text = self.transpose_for_scores(text_k)
+            value_layer_text = self.transpose_for_scores(text_v)
+        if text is None:
+            attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+            attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+            attention_probs = self.softmax(attention_scores)
+            weights = attention_probs if self.vis else None
+            attention_probs = self.attn_dropout(attention_probs)
+            context_layer = torch.matmul(attention_probs, value_layer)
+            context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+            new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+            context_layer = context_layer.view(*new_context_layer_shape)
+            attention_output = self.out(context_layer)
+            attention_output = self.proj_dropout(attention_output)
+            return attention_output,None, weights
+        else:
+            attention_scores_img = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+            attention_scores_text = torch.matmul(query_layer_text, key_layer_text.transpose(-1, -2))
+            attention_scores_it = torch.matmul(query_layer_img, key_layer_text.transpose(-1, -2))
+            attention_scores_ti = torch.matmul(query_layer_text, key_layer_img.transpose(-1, -2))
+            attention_scores_img = attention_scores_img / math.sqrt(self.attention_head_size)
+            attention_probs_img = self.softmax(attention_scores_img)
+            weights_img = attention_probs_img if self.vis else None
+            attention_probs_img = self.attn_dropout(attention_probs_img)
+            attention_scores_text = attention_scores_text / math.sqrt(self.attention_head_size)
+            attention_probs_text = self.softmax(attention_scores_text)
+            text_per_weights = attention_probs_text.mean(dim=-1)
+            text_per_weights = self.softmax(text_per_weights)
+            weights_text = attention_probs_text if self.vis else None
+            attention_probs_text = self.attn_dropout_text(attention_probs_text)
+            attention_scores_it = attention_scores_it / math.sqrt(self.attention_head_size)
+            attention_probs_it = self.softmax(attention_scores_it)
+            attention_probs_it = self.attn_dropout_it(attention_probs_it)
+            attention_scores_ti = attention_scores_ti / math.sqrt(self.attention_head_size)
+            attention_probs_ti = self.softmax(attention_scores_ti)
+            attention_probs_ti = self.attn_dropout_ti(attention_probs_ti)
+            context_layer_img = torch.matmul(attention_probs_img, value_layer_img)
+            context_layer_img = context_layer_img.permute(0, 2, 1, 3).contiguous()
+            context_layer_text = torch.matmul(attention_probs_text, value_layer_text)
+            context_layer_text = context_layer_text.permute(0, 2, 1, 3).contiguous()
+            context_layer_it = torch.matmul(attention_probs_it, value_layer_text)
+            context_layer_it = context_layer_it.permute(0, 2, 1, 3).contiguous()
+            context_layer_ti = torch.matmul(attention_probs_ti, value_layer_img)
+            context_layer_ti = context_layer_ti.permute(0, 2, 1, 3).contiguous()
+            new_context_layer_shape = context_layer_img.size()[:-2] + (self.all_head_size,)
+            context_layer_img = context_layer_img.view(*new_context_layer_shape)
+            new_context_layer_shape = context_layer_text.size()[:-2] + (self.all_head_size,)
+            context_layer_text = context_layer_text.view(*new_context_layer_shape)
+            new_context_layer_shape = context_layer_it.size()[:-2] + (self.all_head_size,)
+            context_layer_it = context_layer_it.view(*new_context_layer_shape)
+            new_context_layer_shape = context_layer_ti.size()[:-2] + (self.all_head_size,)
+            context_layer_ti = context_layer_ti.view(*new_context_layer_shape)
+            attention_output_img = self.out((context_layer_img + context_layer_it)/2)
+            attention_output_text = self.out((context_layer_text + context_layer_ti)/2)
+            attention_output_img = self.proj_dropout(attention_output_img)
+            attention_output_text = self.proj_dropout_text(attention_output_text)
+            return attention_output_img, attention_output_text

models/block.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import copy
+import logging
+import math
+from os.path import join as pjoin
+import torch
+import torch.nn as nn
+import numpy as np
+from torch.nn import BCEWithLogitsLoss,CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d, LayerNorm
+from torch.nn.modules.utils import _pair
+from scipy import ndimage
+import models.configs as configs
+from models.attention import Attention
+from models.embed import Embeddings
+from models.mlp import Mlp
+ATTENTION_Q = "MultiHeadDotProductAttention_1/query"
+ATTENTION_K = "MultiHeadDotProductAttention_1/key"
+ATTENTION_V = "MultiHeadDotProductAttention_1/value"
+ATTENTION_OUT = "MultiHeadDotProductAttention_1/out"
+FC_0 = "MlpBlock_3/Dense_0"
+FC_1 = "MlpBlock_3/Dense_1"
+ATTENTION_NORM = "LayerNorm_0"
+MLP_NORM = "LayerNorm_2"
+class Block(nn.Module):
+    def __init__(self, config, vis, mm=True):
+        super(Block, self).__init__()
+        self.hidden_size = config.hidden_size
+        self.attention_norm = LayerNorm(config.hidden_size, eps=1e-6)
+        self.ffn_norm = LayerNorm(config.hidden_size, eps=1e-6)
+        if mm:
+            self.att_norm_text = LayerNorm(config.hidden_size, eps=1e-6)
+            self.ffn_norm_text = LayerNorm(config.hidden_size, eps=1e-6)
+            self.ffn_text = Mlp(config)
+        self.ffn = Mlp(config)
+        self.attn = Attention(config, vis, mm)
+    def forward(self, x, text=None):
+        if text is None:
+            h = x
+            x = self.attention_norm(x)
+            x, text,weights = self.attn(x)
+            x = x + h
+            h = x
+            x = self.ffn_norm(x)
+            x = self.ffn(x)
+            x = x + h
+            return x
+        else:
+            h = x
+            h_text = text
+            x = self.attention_norm(x)
+            text = self.att_norm_text(text)
+            x, text, weights_img = self.attn(x, text)
+            x = x + h
+            text = text + h_text
+            h = x
+            h_text = text
+            x = self.ffn_norm(x)
+            text = self.ffn_norm_text(text)
+            x = self.ffn(x)
+            text = self.ffn_text(text)
+            x = x + h
+            text = text + h_text
+            return x
+    def load_from(self, weights, n_block):
+        ROOT = f"Transformer/encoderblock_{n_block}"
+        with torch.no_grad():
+            query_weight = np2th(weights[pjoin(ROOT, ATTENTION_Q, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            key_weight = np2th(weights[pjoin(ROOT, ATTENTION_K, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            value_weight = np2th(weights[pjoin(ROOT, ATTENTION_V, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            out_weight = np2th(weights[pjoin(ROOT, ATTENTION_OUT, "kernel")]).view(self.hidden_size, self.hidden_size).t()
+            query_bias = np2th(weights[pjoin(ROOT, ATTENTION_Q, "bias")]).view(-1)
+            key_bias = np2th(weights[pjoin(ROOT, ATTENTION_K, "bias")]).view(-1)
+            value_bias = np2th(weights[pjoin(ROOT, ATTENTION_V, "bias")]).view(-1)
+            out_bias = np2th(weights[pjoin(ROOT, ATTENTION_OUT, "bias")]).view(-1)
+            self.attn.query.weight.copy_(query_weight)
+            self.attn.key.weight.copy_(key_weight)
+            self.attn.value.weight.copy_(value_weight)
+            self.attn.out.weight.copy_(out_weight)
+            self.attn.query.bias.copy_(query_bias)
+            self.attn.key.bias.copy_(key_bias)
+            self.attn.value.bias.copy_(value_bias)
+            self.attn.out.bias.copy_(out_bias)
+            mlp_weight_0 = np2th(weights[pjoin(ROOT, FC_0, "kernel")]).t()
+            mlp_weight_1 = np2th(weights[pjoin(ROOT, FC_1, "kernel")]).t()
+            mlp_bias_0 = np2th(weights[pjoin(ROOT, FC_0, "bias")]).t()
+            mlp_bias_1 = np2th(weights[pjoin(ROOT, FC_1, "bias")]).t()
+            self.ffn.fc1.weight.copy_(mlp_weight_0)
+            self.ffn.fc2.weight.copy_(mlp_weight_1)
+            self.ffn.fc1.bias.copy_(mlp_bias_0)
+            self.ffn.fc2.bias.copy_(mlp_bias_1)
+            self.attention_norm.weight.copy_(np2th(weights[pjoin(ROOT, ATTENTION_NORM, "scale")]))
+            self.attention_norm.bias.copy_(np2th(weights[pjoin(ROOT, ATTENTION_NORM, "bias")]))
+            self.ffn_norm.weight.copy_(np2th(weights[pjoin(ROOT, MLP_NORM, "scale")]))
+            self.ffn_norm.bias.copy_(np2th(weights[pjoin(ROOT, MLP_NORM, "bias")]))

models/configs.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ml_collections
+def get_IRENE_config():
+    """Returns the PATHOLOGICAL_CLASSFIER configuration."""
+    config = ml_collections.ConfigDict()
+    config.patches = ml_collections.ConfigDict({'size': (16, 16)})
+    config.hidden_size = 512
+    config.transformer = ml_collections.ConfigDict()
+    config.transformer.mlp_dim = 1024
+    config.transformer.num_heads = 1 #需要被hidden_size整除
+    config.transformer.num_layers = 4  # 其他三个院训练后续模型都是4 TCGA用的2
+    # config.transformer.num_layers = 2  # 其他三个院训练后续模型都是4 TCGA用的2
+    config.transformer.attention_dropout_rate = 0.2 # 0.0 - 0.2
+    config.transformer.dropout_rate = 0.3 # 0.1 - 0.3
+    config.classifier = 'token'
+    config.representation_size = None
+    config.cc_len = 40
+    config.lab_len = 92
+    return config

models/embed.py ADDED Viewed

	@@ -0,0 +1,97 @@

+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import copy
+import logging
+import math
+from os.path import join as pjoin
+import torch
+import torch.nn as nn
+import numpy as np
+from torch.nn import BCEWithLogitsLoss,CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d, LayerNorm
+from torch.nn.modules.utils import _pair
+from scipy import ndimage
+import models.configs as configs
+from models.attention import Attention
+import pdb
+class Embeddings(nn.Module):
+    """Construct the embeddings from patch, position embeddings.
+    """
+    def __init__(self, config, img_size, in_channels=3):
+        super(Embeddings, self).__init__()
+        self.hybrid = None
+        img_size = _pair(img_size)
+        tk_lim = config.cc_len
+        num_lab = config.lab_len
+        if config.patches.get("grid") is not None:
+            grid_size = config.patches["grid"]
+            patch_size = (img_size[0] // 16 // grid_size[0], img_size[1] // 16 // grid_size[1])
+            n_patches = (img_size[0] // 16) * (img_size[1] // 16)
+            self.hybrid = True
+        else:
+            patch_size = _pair(config.patches["size"])
+            n_patches = (img_size[0] // patch_size[0]) * (img_size[1] // patch_size[1])
+            self.hybrid = False
+        self.patch_embeddings = Conv2d(in_channels=in_channels,
+                                       out_channels=config.hidden_size,
+                                       kernel_size=patch_size,
+                                       stride=patch_size)
+        self.cc_embeddings = Linear(768, config.hidden_size)
+        self.lab_embeddings = Linear(1, config.hidden_size)
+        self.sex_embeddings = Linear(1, config.hidden_size)
+        self.age_embeddings = Linear(1, config.hidden_size)
+        self.position_embeddings = nn.Parameter(torch.zeros(1, 1+n_patches, config.hidden_size))
+        self.pe_txt = nn.Parameter(torch.zeros(1, tk_lim, config.hidden_size))
+        self.pe_lab = nn.Parameter(torch.zeros(1, num_lab, config.hidden_size))
+        self.pe_sex = nn.Parameter(torch.zeros(1, 1, config.hidden_size))
+        self.pe_age = nn.Parameter(torch.zeros(1, 1, config.hidden_size))
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, config.hidden_size))
+        self.dropout = Dropout(config.transformer["dropout_rate"])
+        self.dropout_txt = Dropout(config.transformer["dropout_rate"])
+        self.dropout_lab = Dropout(config.transformer["dropout_rate"])
+        self.dropout_sex = Dropout(config.transformer["dropout_rate"])
+        self.dropout_age = Dropout(config.transformer["dropout_rate"])
+    def forward(self, x, txt, lab, sex, age):
+        B = x.shape[0]
+        cls_tokens = self.cls_token.expand(B, -1, -1)
+        if self.hybrid:
+            x = self.hybrid_model(x)
+        x = self.patch_embeddings(x) # 16*16 --> CNN --> 1*1
+        txt = self.cc_embeddings(txt)
+        lab = self.lab_embeddings(lab)
+        sex = self.sex_embeddings(sex)
+        age = self.age_embeddings(age)
+        x = x.flatten(2)
+        x = x.transpose(-1, -2)
+        x = torch.cat((cls_tokens, x), dim=1)
+        embeddings = x + self.position_embeddings
+        cc_embeddings = txt + self.pe_txt
+        lab_embeddings = lab + self.pe_lab
+        sex_embeddings = sex + self.pe_sex
+        age_embeddings = age + self.pe_age
+        embeddings = self.dropout(embeddings)
+        cc_embeddings = self.dropout_txt(cc_embeddings)
+        lab_embeddings = self.dropout_lab(lab_embeddings)
+        sex_embeddings = self.dropout_sex(sex_embeddings)
+        age_embeddings = self.dropout_age(age_embeddings)
+        return embeddings, cc_embeddings, lab_embeddings, sex_embeddings, age_embeddings

models/encoder.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import copy
+import logging
+import math
+from os.path import join as pjoin
+import torch
+import torch.nn as nn
+import numpy as np
+from torch.nn import BCEWithLogitsLoss,CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d, LayerNorm
+from torch.nn.modules.utils import _pair
+from scipy import ndimage
+import models.configs as configs
+from models.attention import Attention
+from models.embed import Embeddings
+from models.mlp import Mlp
+from models.block import Block
+class Encoder(nn.Module):
+    def __init__(self, config, vis,mm):
+        super(Encoder, self).__init__()
+        self.vis = vis
+        self.layer = nn.ModuleList()
+        self.encoder_norm = LayerNorm(config.hidden_size, eps=1e-6)
+        for i in range(config.transformer["num_layers"]):
+            if i < 2:
+                layer = Block(config, vis, mm)
+            else:
+                layer = Block(config, vis,mm=False)
+            self.layer.append(copy.deepcopy(layer))
+        self.img_adaptive_avg_pool  = nn.AdaptiveAvgPool2d(output_size=(1, 512))
+        self.txt_adaptive_avg_pool  = nn.AdaptiveAvgPool2d(output_size=(1, 512))
+    def forward(self, hidden_states, text=None):
+        for (i, layer_block) in enumerate(self.layer):
+            if i == 2:
+                if text is not None:
+                    hidden_states = self.img_adaptive_avg_pool(hidden_states)
+                    text = self.txt_adaptive_avg_pool(text)
+                    hidden_states = torch.cat((hidden_states, text), 1)
+                    hidden_states,text ,weights = layer_block(hidden_states)
+                else:
+                    hidden_states, text, weights = layer_block(hidden_states)
+            elif i < 2:
+                hidden_states, text, weights = layer_block(hidden_states, text)
+            else:
+                hidden_states,text, weights = layer_block(hidden_states)
+        encoded = self.encoder_norm(hidden_states)
+        return encoded

models/mlp.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import copy
+import logging
+import math
+from os.path import join as pjoin
+import torch
+import torch.nn as nn
+import numpy as np
+from torch.nn import BCEWithLogitsLoss,CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d, LayerNorm
+from torch.nn.modules.utils import _pair
+from scipy import ndimage
+import models.configs as configs
+from models.attention import Attention
+from models.embed import Embeddings
+import pdb
+def swish(x):
+    return x * torch.sigmoid(x)
+ACT2FN = {"gelu": torch.nn.functional.gelu, "relu": torch.nn.functional.relu, "swish": swish}
+class Mlp(nn.Module):
+    def __init__(self, config):
+        super(Mlp, self).__init__()
+        self.fc1 = Linear(config.hidden_size, config.transformer["mlp_dim"])
+        self.fc2 = Linear(config.transformer["mlp_dim"], config.hidden_size)
+        self.act_fn = ACT2FN["gelu"]
+        self.dropout = Dropout(config.transformer["dropout_rate"])
+        self._init_weights()
+    def _init_weights(self):
+        nn.init.xavier_uniform_(self.fc1.weight)
+        nn.init.xavier_uniform_(self.fc2.weight)
+        nn.init.normal_(self.fc1.bias, std=1e-6)
+        nn.init.normal_(self.fc2.bias, std=1e-6)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act_fn(x)
+        x = self.dropout(x)
+        x = self.fc2(x)
+        x = self.dropout(x)
+        return x

models/modeling.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# coding=utf-8
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import copy
+import logging
+import math
+from os.path import join as pjoin
+import torch
+import torch.nn as nn
+import numpy as np
+from torch.nn import BCEWithLogitsLoss,CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d, LayerNorm
+from torch.nn.modules.utils import _pair
+from scipy import ndimage
+import models.configs as configs
+from models.attention import Attention
+from models.embed import Embeddings
+from models.mlp import Mlp
+from models.block import Block
+from models.encoder import Encoder
+class Transformer(nn.Module):
+    def __init__(self, config, img_size, vis,mm):
+        super(Transformer, self).__init__()
+        self.encoder = Encoder(config, vis,mm)
+    def forward(self, input_ids, txt=None):
+        text = txt
+        encoded = self.encoder(input_ids, text)
+        return encoded
+class PATHOLOGICAL_CLASSFIER(nn.Module):
+    def __init__(self, config, img_size=224, num_classes=2, vis=True,mm=True):
+        super(PATHOLOGICAL_CLASSFIER, self).__init__()
+        self.num_classes = num_classes
+        self.transformer1 = Transformer(config, img_size, vis=True,mm=mm)
+        self.transformer2 = Transformer(config, img_size, vis=True,mm=mm)
+        self.head1 = Linear(config.hidden_size, num_classes)
+        self.head2 = Linear(config.hidden_size, num_classes)
+    def forward(self, x, txt=None):
+        x1 = self.transformer1(x, txt)
+        logits_1 = self.head1(torch.mean(x1, dim=1))
+        x2 = self.transformer2(x, txt)
+        logits_2 = self.head2(torch.mean(x2, dim=1))
+        return logits_1, logits_2
+CONFIGS = {
+    'PATHOLOGICAL_CLASSFIER': configs.get_IRENE_config(),
+}