File size: 2,026 Bytes
e41678c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import torch
import torch.nn as nn
from transformers import AutoTokenizer,AutoModelForSequenceClassification,AutoConfig
import numpy as np
import pandas as pd
import re
from Bio.Seq import Seq
from collections import OrderedDict
from transformers import set_seed
import random
import gradio as gr
def setup_seed(seed):
set_seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
setup_seed(4)
device = "cpu"
model_checkpoint = "facebook/esm2_t6_8M_UR50D"
config = AutoConfig.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
def conotoxinfinder(files):
fr=open(files, 'r')
seqs = []
for line in fr:
if not line.startswith('>'):
line = line.replace('\n','')
line = line.replace(' ','')
if line.islower():
seqs.append(str((Seq(line).translate())))
else:
seqs.append(line)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=1)
model.load_state_dict(torch.load("best_model.pth"))
model = model.to(device)
value_all = []
for i in seqs:
tokenizer_test = tokenizer(i, return_tensors='pt').to(device)
with torch.no_grad():
value = model(**tokenizer_test)
value_all.append(np.exp(value["logits"][0].item()))
summary = OrderedDict()
summary['Seq'] = seqs
summary['Value'] = value_all
summary_df = pd.DataFrame(summary)
summary_df.to_csv('output.csv', index=False)
return 'output.csv'
with open("conotoxinfinder.md", "r") as f:
description = f.read()
iface = gr.Interface(fn=conotoxinfinder,
title="ConotoxinFinder α7 regression",
inputs=["file"
],
outputs= "file",
description=description
)
iface.launch() |