miladansari commited on
Commit
6933624
·
1 Parent(s): 5ff079e

Upload testforhuggingface.py

Browse files
Files changed (1) hide show
  1. testforhuggingface.py +65 -0
testforhuggingface.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """testforhuggingface.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1w2iR_ooTp26Ng1TtfIv8_6bOp2WL9Xa9
8
+ """
9
+
10
+ # Commented out IPython magic to ensure Python compatibility.
11
+ # %cd drive/MyDrive/protein-bert/proteinbert/
12
+
13
+
14
+ !pip install git+'https://github.com/miladansari/protein_bert.git'
15
+ !git clone https://github.com/nadavbra/shared_utils.git
16
+
17
+ #import related libraries
18
+
19
+ import numpy as np
20
+ import tensorflow as tf
21
+ from tensorflow import keras
22
+ from tensorflow.keras.preprocessing.text import Tokenizer
23
+ from tensorflow.keras import layers
24
+ import matplotlib.pyplot as plt
25
+ import datetime
26
+ from pandas import read_csv
27
+ # %load_ext tensorboard
28
+
29
+
30
+ #reading files.
31
+
32
+ test = read_csv('/content/drive/MyDrive/protein-bert/BENCHMARK_DIR/hemolytic.test.csv',skipinitialspace=True)
33
+ train = read_csv('/content/drive/MyDrive/protein-bert/BENCHMARK_DIR/hemolytic.train.csv',skipinitialspace=True)
34
+
35
+ seqs=train['seq']
36
+ seqs_test=test['seq']
37
+
38
+ #set sequence length to 37 (the longest sequence in dataset is 35 and the model will add <start> and <end> token to the sequence.
39
+
40
+ seq_len=37
41
+ batch_size=32
42
+
43
+ from proteinbert import load_pretrained_model
44
+ from proteinbert.conv_and_global_attention_model import get_model_with_hidden_layers_as_outputs
45
+
46
+ #load pretrained model.
47
+ pretrained_model_generator, input_encoder = load_pretrained_model(local_model_dump_dir='/content/drive/MyDrive/protein-bert/proteinbert')
48
+ model = get_model_with_hidden_layers_as_outputs(pretrained_model_generator.create_model(seq_len))
49
+
50
+ #extract local representaion (Embeddings).
51
+ X = input_encoder.encode_X(seqs, seq_len)
52
+ local_representations, global_representations= model.predict(X, batch_size = batch_size)
53
+
54
+ X_test= input_encoder.encode_X(seqs_test, seq_len)
55
+ local_representations_test, global_representations_test= model.predict(X_test, batch_size = batch_size)
56
+
57
+ #simple classifier
58
+ model_D=tf.keras.models.Sequential([
59
+ tf.keras.layers.Flatten(input_shape = local_representations[0].shape),
60
+ tf.keras.layers.Dense(1, activation = 'sigmoid')])
61
+
62
+ model_D.load_weights('/content/drive/MyDrive/weights/model_D_weights')
63
+
64
+
65
+