oucgc1996 commited on
Commit
e41678c
1 Parent(s): fcc0132

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +4 -4
  2. app.py +70 -0
  3. best_model.pth +3 -0
  4. conotoxinfinder.md +0 -0
  5. requirements.txt +3 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: ConotoxinFinder Regression
3
- emoji: 😻
4
- colorFrom: green
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.39.0
8
  app_file: app.py
 
1
  ---
2
+ title: ConotoxinFinder nAChRs
3
+ emoji: 💻
4
+ colorFrom: purple
5
+ colorTo: red
6
  sdk: gradio
7
  sdk_version: 4.39.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from transformers import AutoTokenizer,AutoModelForSequenceClassification,AutoConfig
4
+ from torchcrf import CRF
5
+ import numpy as np
6
+ import pandas as pd
7
+ import re
8
+ from Bio.Seq import Seq
9
+ import matplotlib.pyplot as plt
10
+ from collections import OrderedDict
11
+ from transformers import set_seed
12
+ import random
13
+ import gradio as gr
14
+
15
+ def setup_seed(seed):
16
+ set_seed(seed)
17
+ torch.manual_seed(seed)
18
+ torch.cuda.manual_seed_all(seed)
19
+ np.random.seed(seed)
20
+ random.seed(seed)
21
+ torch.backends.cudnn.deterministic = True
22
+ setup_seed(4)
23
+
24
+ device = "cpu"
25
+ model_checkpoint = "facebook/esm2_t6_8M_UR50D"
26
+
27
+ config = AutoConfig.from_pretrained(model_checkpoint)
28
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
29
+
30
+
31
+ def conotoxinfinder(files):
32
+ fr=open(files, 'r')
33
+ seqs = []
34
+ for line in fr:
35
+ if not line.startswith('>'):
36
+ line = line.replace('\n','')
37
+ line = line.replace(' ','')
38
+ if line.islower():
39
+ seqs.append(str((Seq(line).translate())))
40
+ else:
41
+ seqs.append(line)
42
+
43
+ model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=1)
44
+ model.load_state_dict(torch.load("best_model.pth"))
45
+ model = model.to(device)
46
+
47
+ value_all = []
48
+ for i in seqs:
49
+ tokenizer_test = tokenizer(i, return_tensors='pt').to(device)
50
+ with torch.no_grad():
51
+ value = model(**tokenizer_test)
52
+ value_all.append(np.exp(value["logits"][0].item()))
53
+
54
+ summary = OrderedDict()
55
+ summary['Seq'] = seqs
56
+ summary['Value'] = value_all
57
+ summary_df = pd.DataFrame(summary)
58
+ summary_df.to_csv('output.csv', index=False)
59
+ return 'output.csv'
60
+
61
+ with open("conotoxinfinder.md", "r") as f:
62
+ description = f.read()
63
+ iface = gr.Interface(fn=conotoxinfinder,
64
+ title="ConotoxinFinder α7 regression",
65
+ inputs=["file"
66
+ ],
67
+ outputs= "file",
68
+ description=description
69
+ )
70
+ iface.launch()
best_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede487d153ed22a196a8f45fe31bedb334bba2bc8579ee5554a655422a8b4983
3
+ size 31413855
conotoxinfinder.md ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ numpy
2
+ torch
3
+ pandas