miladansari
commited on
Commit
·
6933624
1
Parent(s):
5ff079e
Upload testforhuggingface.py
Browse files- testforhuggingface.py +65 -0
testforhuggingface.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""testforhuggingface.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1w2iR_ooTp26Ng1TtfIv8_6bOp2WL9Xa9
|
8 |
+
"""
|
9 |
+
|
10 |
+
# Commented out IPython magic to ensure Python compatibility.
|
11 |
+
# %cd drive/MyDrive/protein-bert/proteinbert/
|
12 |
+
|
13 |
+
|
14 |
+
!pip install git+'https://github.com/miladansari/protein_bert.git'
|
15 |
+
!git clone https://github.com/nadavbra/shared_utils.git
|
16 |
+
|
17 |
+
#import related libraries
|
18 |
+
|
19 |
+
import numpy as np
|
20 |
+
import tensorflow as tf
|
21 |
+
from tensorflow import keras
|
22 |
+
from tensorflow.keras.preprocessing.text import Tokenizer
|
23 |
+
from tensorflow.keras import layers
|
24 |
+
import matplotlib.pyplot as plt
|
25 |
+
import datetime
|
26 |
+
from pandas import read_csv
|
27 |
+
# %load_ext tensorboard
|
28 |
+
|
29 |
+
|
30 |
+
#reading files.
|
31 |
+
|
32 |
+
test = read_csv('/content/drive/MyDrive/protein-bert/BENCHMARK_DIR/hemolytic.test.csv',skipinitialspace=True)
|
33 |
+
train = read_csv('/content/drive/MyDrive/protein-bert/BENCHMARK_DIR/hemolytic.train.csv',skipinitialspace=True)
|
34 |
+
|
35 |
+
seqs=train['seq']
|
36 |
+
seqs_test=test['seq']
|
37 |
+
|
38 |
+
#set sequence length to 37 (the longest sequence in dataset is 35 and the model will add <start> and <end> token to the sequence.
|
39 |
+
|
40 |
+
seq_len=37
|
41 |
+
batch_size=32
|
42 |
+
|
43 |
+
from proteinbert import load_pretrained_model
|
44 |
+
from proteinbert.conv_and_global_attention_model import get_model_with_hidden_layers_as_outputs
|
45 |
+
|
46 |
+
#load pretrained model.
|
47 |
+
pretrained_model_generator, input_encoder = load_pretrained_model(local_model_dump_dir='/content/drive/MyDrive/protein-bert/proteinbert')
|
48 |
+
model = get_model_with_hidden_layers_as_outputs(pretrained_model_generator.create_model(seq_len))
|
49 |
+
|
50 |
+
#extract local representaion (Embeddings).
|
51 |
+
X = input_encoder.encode_X(seqs, seq_len)
|
52 |
+
local_representations, global_representations= model.predict(X, batch_size = batch_size)
|
53 |
+
|
54 |
+
X_test= input_encoder.encode_X(seqs_test, seq_len)
|
55 |
+
local_representations_test, global_representations_test= model.predict(X_test, batch_size = batch_size)
|
56 |
+
|
57 |
+
#simple classifier
|
58 |
+
model_D=tf.keras.models.Sequential([
|
59 |
+
tf.keras.layers.Flatten(input_shape = local_representations[0].shape),
|
60 |
+
tf.keras.layers.Dense(1, activation = 'sigmoid')])
|
61 |
+
|
62 |
+
model_D.load_weights('/content/drive/MyDrive/weights/model_D_weights')
|
63 |
+
|
64 |
+
|
65 |
+
|