File size: 2,868 Bytes
2116269 febe13d 2116269 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
from os import path
import streamlit as st
# import pickle
# from tensorflow import keras
import tensorflow as tf
import torch
from torch import nn
from transformers import BertModel, BertTokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_NAME = 'bert-base-cased'
# Build the Sentiment Classifier class
class SentimentClassifier(nn.Module):
# Constructor class
def __init__(self, n_classes):
super(SentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(MODEL_NAME)
self.drop = nn.Dropout(p=0.3)
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
# Forward propagaion class
def forward(self, input_ids, attention_mask):
_, pooled_output = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
return_dict=False
)
# Add a dropout layer
output = self.drop(pooled_output)
return self.out(output)
# from keras_preprocessing.sequence import pad_sequences
# def predict(ham_spam):
# model = load_model(r'test_HSmodel_r.h5')
# with open('tokenizer.pickle','rb') as handle:
# tokenizer = pickle.load(handle)
# tokenizer.fit_on_texts(ham_spam)
# x_1 = tokenizer.texts_to_sequences([ham_spam])
# x_1 = pad_sequences(x_1, maxlen=525)
# predictions = model.predict(x_1)[0][0]
# return predictions
MODEL_PATH = path.join(path.dirname(__file__), "bert_model.h5")
@st.cache_resource
def load_model_and_tokenizer():
model = SentimentClassifier(3)
model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu')))
model.eval()
return model, BertTokenizer.from_pretrained('bert-base-cased')
def predict(content):
model, tokenizer = load_model_and_tokenizer()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoded_review = tokenizer.encode_plus(
content,
max_length=160,
add_special_tokens=True,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors="pt",
)
input_ids = encoded_review["input_ids"].to(device)
attention_mask = encoded_review["attention_mask"].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
class_names = ["negative", "neutral", "positive"]
return class_names[prediction]
def main():
# giving a title to our page
st.title("Sentiment detection")
contents = st.text_area("Please enter reviews/sentiment/setences/contents:")
prediction = ""
# Create a prediction button
if st.button("Analyze Spam Detection Result"):
prediction = predict(contents)
st.success(prediction)
if __name__ == "__main__":
main()
|