Regino
nasf
3f37371
raw
history blame
5.65 kB
import joblib
import streamlit as st
import pandas as pd
import re
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from nltk.corpus import stopwords
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Download stopwords if not already available
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))
# Load the trained model and TF-IDF vectorizer
model = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")
# Load dataset with manually defined headers
column_names = ["id", "place", "label", "text"]
df = pd.read_csv("twitter_training.csv", names=column_names, header=None)
# Function to preprocess text
def preprocess_text(text):
text = str(text).lower()
text = re.sub(r"\W", " ", text) # Remove special characters
text = re.sub(r"\s+", " ", text).strip() # Remove extra spaces
text = " ".join([word for word in text.split() if word not in stop_words]) # Remove stopwords
return text
# Load test dataset and compute model metrics
try:
test_df = pd.read_csv("twitter_validation.csv", names=column_names, header=None)
X_test = vectorizer.transform(test_df["text"].astype(str))
y_test = test_df["label"]
y_pred = model.predict(X_test)
# Model metrics
accuracy = accuracy_score(y_test, y_pred)
classification_report_text = classification_report(y_test, y_pred, output_dict=True)
class_report_df = pd.DataFrame(classification_report_text).T.round(2)
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=["Positive", "Neutral", "Negative"])
except Exception as e:
accuracy = None
class_report_df = None
cm = None
# Function to predict sentiment
def predict_sentiment(user_input):
cleaned_text = preprocess_text(user_input)
text_vector = vectorizer.transform([cleaned_text])
prediction = model.predict(text_vector)[0]
return prediction
# Sidebar Navigation
st.sidebar.title("πŸ” Sentiment Analysis App")
st.sidebar.markdown(
"This app performs **Sentiment Analysis** on text using **Machine Learning**. "
"It classifies text as **Positive, Neutral, or Negative** based on its sentiment."
)
st.sidebar.header("πŸ“Œ Navigation")
page = st.sidebar.radio(
"Go to:",
["πŸ“‚ Dataset", "πŸ“Š Visualizations", "πŸ“ˆ Model Metrics", "πŸ€– Sentiment Predictor"]
)
# App Title and Explanation
st.title("πŸ“’ Twitter Sentiment Analysis")
st.markdown(
"This application uses **Natural Language Processing (NLP)** and "
"**Logistic Regression** to analyze the sentiment of tweets. The model is trained using a dataset "
"of tweets labeled as **Positive, Neutral, or Negative**."
)
# πŸ“‚ Dataset Page
if page == "πŸ“‚ Dataset":
st.header("πŸ“‚ Dataset Preview")
st.dataframe(df.iloc[49:105])
# πŸ“Š Visualization Page
elif page == "πŸ“Š Visualizations":
st.header("πŸ“Š Data Visualizations")
# Pie Chart of Sentiments
st.subheader("πŸ₯§ Sentiment Distribution")
fig, ax = plt.subplots(figsize=(5, 5))
df["label"].value_counts().plot(kind="pie", autopct="%1.1f%%", colors=["green", "gray", "red", "blue"], ax=ax)
plt.title("Sentiment Distribution")
plt.ylabel("")
st.pyplot(fig)
# Bar Chart of Sentiment Counts
st.subheader("πŸ“Š Sentiment Count (Bar Chart)")
fig, ax = plt.subplots(figsize=(6, 4))
sns.countplot(x=df["label"], palette={"Positive": "green", "Neutral": "gray", "Negative": "red", "Irrelevant": "blue"}, ax=ax)
plt.xlabel("Sentiment Type")
plt.ylabel("Count")
plt.title("Distribution of Sentiments")
st.pyplot(fig)
# Word Cloud for Most Frequent Words
st.subheader("☁️ Word Cloud of Most Common Words")
text_data = " ".join(df["text"].astype(str))
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text_data)
fig, ax = plt.subplots(figsize=(8, 4))
ax.imshow(wordcloud, interpolation="bilinear")
ax.axis("off")
st.pyplot(fig)
# πŸ“ˆ Model Metrics Page
elif page == "πŸ“ˆ Model Metrics":
st.header("πŸ“ˆ Model Performance")
if accuracy is not None:
st.write(f"βœ… **Accuracy:** {accuracy * 100:.2f}%")
else:
st.warning("⚠️ Could not calculate accuracy. Please check the test dataset.")
if class_report_df is not None and not class_report_df.empty:
st.subheader("πŸ“Œ Classification Report")
st.dataframe(class_report_df)
else:
st.warning("⚠️ Classification report is empty.")
if cm is not None and cm.any():
st.subheader("πŸ”₯ Confusion Matrix")
fig, ax = plt.subplots(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Positive", "Neutral", "Negative"], yticklabels=["Positive", "Neutral", "Negative"], ax=ax)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
st.pyplot(fig)
else:
st.warning("⚠️ Confusion matrix could not be generated.")
# πŸ€– Sentiment Predictor Page
elif page == "πŸ€– Sentiment Predictor":
st.header("πŸ€– Sentiment Analysis")
st.markdown("Enter a sentence below, and the model will predict whether it is **Positive, Neutral, or Negative**.")
user_input = st.text_area("Type your sentence here:", "")
if st.button("Analyze Sentiment"):
if user_input.strip():
sentiment_result = predict_sentiment(user_input)
st.markdown(f"### πŸ” Prediction: **{sentiment_result}**")
else:
st.warning("Please enter some text to analyze.")