File size: 3,783 Bytes
76df63f
 
 
 
 
 
 
 
 
002ef29
76df63f
 
 
cff9fff
eeb3f58
76df63f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f19339e
 
f4c361f
0063dd4
f4c361f
 
 
 
 
76df63f
1efc8c1
76df63f
 
 
 
 
 
 
 
002ef29
f19339e
 
41d85ec
 
 
 
 
 
 
 
 
 
76df63f
ef79cb5
 
 
 
 
 
 
90a3cbc
 
 
ef79cb5
90a3cbc
 
 
ef79cb5
90a3cbc
0cb8077
76df63f
772c696
 
f19339e
0fc78f8
e84e9ff
772c696
 
76df63f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
from transformers import set_seed
import torch
import torch.nn as nn
from collections import OrderedDict
import warnings
import gradio as gr
from tqdm import tqdm

warnings.filterwarnings('ignore')
set_seed(4)  
device = "cpu"
model_checkpoint = "esm2_t30_150M_UR50D"

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = AutoModelForSequenceClassification.from_pretrained(model_checkpoint,num_labels=320)
        self.bn1 = nn.BatchNorm1d(256)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(320,256)
        self.fc2 = nn.Linear(256,128)
        self.fc3 = nn.Linear(128,64)
        self.output_layer = nn.Linear(64,2)
        self.dropout = nn.Dropout(0)

    def forward(self,x):
        with torch.no_grad():
            bert_output = self.bert(input_ids=x['input_ids'].to(device),attention_mask=x['attention_mask'].to(device)) 
        output_feature = self.dropout(bert_output["logits"])
        output_feature = self.relu(self.bn1(self.fc1(output_feature)))
        output_feature = self.relu(self.bn2(self.fc2(output_feature)))
        output_feature = self.relu(self.bn3(self.fc3(output_feature)))
        output_feature = self.output_layer(output_feature)
        return torch.softmax(output_feature,dim=1)

def Kmers_funct(seq,num):
    for i in range(len(seq)):
        a = seq[i]
        l = []
        for index in range(len(a)):
            t = a[index:index + num]
            if (len(t)) == num:
                l.append(t)
    return l

def ACE(file):
    test_seq = file
    all = []
    seq_len = len(test_seq)
    if seq_len > 30:
        for j in range(2, 11):
            X = Kmers_funct([test_seq], j)
            all.extend(X)
    else:
        all.append(test_seq)
    model = MyModel()
    model.load_state_dict(torch.load("best_model.pth", map_location=torch.device('cpu')), strict=False)
    model = model.to(device)
    model.eval()
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
    max_len = 30

    seq_all = []
    output_all = []
    probability_all = []
    for seq in tqdm(all):
        test_data = tokenizer(seq, max_length=max_len, padding="max_length",truncation=True, return_tensors='pt')
        out_probability = []
        with torch.no_grad():
            predict = model(test_data)
            out_probability.extend(np.max(np.array(predict.cpu()),axis=1).tolist())
            test_argmax = np.argmax(predict.cpu(), axis=1).tolist()
            id2str = {0:"non-ACE", 1:"ACE"}
            output = id2str[test_argmax[0]]
            probability = out_probability[0]
            seq_all.append(seq)
            output_all.append(output)
            probability_all.append(probability)

    summary = OrderedDict()
    summary['Seq'] = seq_all
    summary['Class'] = output_all
    summary['Probability'] = probability_all
    summary_df = pd.DataFrame(summary)
    summary_df.to_csv('output.csv', index=False)
    
    if seq_len > 30:
        out_text = "None"
        out_prob = "None"

    else:
        out_text = output
        out_prob = probability
        

    return 'output.csv', out_text, out_prob

with open("ACE.md", "r") as f:
    description = f.read()
iface = gr.Interface(fn=ACE,
                     title="🏹DeepACE",
                     inputs=gr.Textbox(show_label=False, placeholder="Enter peptide or protein", lines=4), 
                     outputs= ["file",gr.Textbox(show_label=False, placeholder="class", lines=1),gr.Textbox(show_label=False, placeholder="probability", lines=1)],
                     description=description)
iface.launch()