|
|
|
"""testforhuggingface.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1w2iR_ooTp26Ng1TtfIv8_6bOp2WL9Xa9 |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
!pip install git+'https://github.com/miladansari/protein_bert.git' |
|
!git clone https://github.com/nadavbra/shared_utils.git |
|
|
|
|
|
|
|
import numpy as np |
|
import tensorflow as tf |
|
from tensorflow import keras |
|
from tensorflow.keras.preprocessing.text import Tokenizer |
|
from tensorflow.keras import layers |
|
import matplotlib.pyplot as plt |
|
import datetime |
|
from pandas import read_csv |
|
|
|
|
|
|
|
|
|
|
|
test = read_csv('hemolytic.test.csv',skipinitialspace=True) |
|
train = read_csv('hemolytic.train.csv',skipinitialspace=True) |
|
|
|
seqs=train['seq'] |
|
seqs_test=test['seq'] |
|
|
|
|
|
|
|
seq_len=37 |
|
batch_size=32 |
|
|
|
from proteinbert import load_pretrained_model |
|
from proteinbert.conv_and_global_attention_model import get_model_with_hidden_layers_as_outputs |
|
|
|
|
|
pretrained_model_generator, input_encoder = load_pretrained_model(local_model_dump_dir='proteinbert_preModel') |
|
model = get_model_with_hidden_layers_as_outputs(pretrained_model_generator.create_model(seq_len)) |
|
|
|
|
|
X = input_encoder.encode_X(seqs, seq_len) |
|
local_representations, global_representations= model.predict(X, batch_size = batch_size) |
|
|
|
X_test= input_encoder.encode_X(seqs_test, seq_len) |
|
local_representations_test, global_representations_test= model.predict(X_test, batch_size = batch_size) |
|
|
|
|
|
model_D=tf.keras.models.Sequential([ |
|
tf.keras.layers.Flatten(input_shape = local_representations[0].shape), |
|
tf.keras.layers.Dense(1, activation = 'sigmoid')]) |
|
|
|
model_D.load_weights('/model_D_weights') |
|
|
|
|
|
|
|
|