Winnie-Kay's picture
Update app.py
c00a4d2
import streamlit as st
from PIL import Image
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# Define the model names or identifiers
model1_name = "Winnie-Kay/Sentiment-Analysis-Roberta-bases"
model2_name = "Winnie-Kay/Finetuned_BertModel_SentimentAnalysis"
# Initialize the tokenizer and models for sentiment analysis
tokenizer1 = AutoTokenizer.from_pretrained(model1_name)
model1 = AutoModelForSequenceClassification.from_pretrained(model1_name)
tokenizer2 = AutoTokenizer.from_pretrained(model2_name)
model2 = AutoModelForSequenceClassification.from_pretrained(model2_name)
# Define a function to preprocess the text data
def preprocess(text):
new_text = []
# Replace user mentions with '@user'
for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t
# Replace links with 'http'
t = 'http' if t.startswith('http') else t
new_text.append(t)
# Join the preprocessed text
return " ".join(new_text)
# Define a function to perform sentiment analysis on the input text using model 1
def sentiment_analysis_model1(text):
# Preprocess the input text
text = preprocess(text)
# Tokenize the input text using the pre-trained tokenizer
encoded_input = tokenizer1(text, return_tensors='pt')
# Feed the tokenized input to the pre-trained model and obtain output
output = model1(**encoded_input)
# Obtain the prediction scores for the output
scores_ = output[0][0].detach().numpy()
# Apply softmax activation function to obtain probability distribution over the labels
scores_ = torch.nn.functional.softmax(torch.from_numpy(scores_), dim=0).numpy()
# Format the output dictionary with the predicted scores
labels = ['Negative', 'Positive']
scores = {l:float(s) for (l,s) in zip(labels, scores_) }
# Return the scores
return scores
# Define a function to perform sentiment analysis on the input text using model 2
def sentiment_analysis_model2(text):
# Preprocess the input text
text = preprocess(text)
# Tokenize the input text using the pre-trained tokenizer
encoded_input = tokenizer2(text, return_tensors='pt')
# Feed the tokenized input to the pre-trained model and obtain output
output = model2(**encoded_input)
# Obtain the prediction scores for the output
scores_ = output[0][0].detach().numpy()
# Apply softmax activation function to obtain probability distribution over the labels
scores_ = torch.nn.functional.softmax(torch.from_numpy(scores_), dim=0).numpy()
# Format the output dictionary with the predicted scores
labels = ['Negative', 'Neutral', 'Positive']
scores = {l:float(s) for (l,s) in zip(labels, scores_) }
# Return the scores
return scores
# Define the Streamlit app
def app():
# Define the app title
st.title("Sentiment Analysis")
# Define the input field
text_input = st.text_input("Enter text:")
# Define the model selection dropdown
model_selection = st.selectbox("Select a model:", ["Model 1", "Model 2"])
# Perform sentiment analysis when the submit button is clicked
if st.button("Submit"):
if text_input:
if model_selection == "Model 1":
# Perform sentiment analysis using model 1
scores = sentiment_analysis_model1(text_input)
st.write(f"Model 1 predicted scores: {scores}")
else:
# Perform sentiment analysis using model 2
scores = sentiment_analysis_model2(text_input)
st.write(f"Model 2 predicted scores: {scores}")