Spaces:

Rejeno
/

SentimentAnalysis

Sleeping

File size: 5,720 Bytes

0e876c8

import joblib
import streamlit as st
import pandas as pd
import re
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from nltk.corpus import stopwords
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Download stopwords if not already available
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

# Load the trained model and TF-IDF vectorizer
model = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")

# Load dataset with manually defined headers
column_names = ["id", "place", "label", "text"]
df = pd.read_csv("twitter_training.csv", names=column_names, header=None)

# Function to preprocess text
def preprocess_text(text):
    text = str(text).lower()
    text = re.sub(r"\W", " ", text)  # Remove special characters
    text = re.sub(r"\s+", " ", text).strip()  # Remove extra spaces
    text = " ".join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

# Load test dataset and compute model metrics
try:
    test_df = pd.read_csv("twitter_validation.csv", names=column_names, header=None)
    X_test = vectorizer.transform(test_df["text"].astype(str))
    y_test = test_df["label"]
    y_pred = model.predict(X_test)

    # Model metrics
    accuracy = accuracy_score(y_test, y_pred)
    classification_report_text = classification_report(y_test, y_pred, output_dict=True)
    class_report_df = pd.DataFrame(classification_report_text).T.round(2)
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels=["Positive", "Neutral", "Negative"])

except Exception as e:
    accuracy = None
    class_report_df = None
    cm = None

# Function to predict sentiment
def predict_sentiment(user_input):
    cleaned_text = preprocess_text(user_input)
    text_vector = vectorizer.transform([cleaned_text])
    prediction = model.predict(text_vector)[0]
    return prediction

# Sidebar Navigation
st.sidebar.title("🔍 Sentiment Analysis App")
st.sidebar.markdown(
    "This app performs **Sentiment Analysis** on text using **Machine Learning**. "
    "It classifies text as **Positive, Neutral, or Negative** based on its sentiment."
)

st.sidebar.header("📌 Navigation")
page = st.sidebar.radio(
    "Go to:", 
    ["📂 Dataset", "📊 Visualizations", "📈 Model Metrics", "🤖 Sentiment Predictor"]
)

# App Title and Explanation
st.title("📢 Twitter Sentiment Analysis")
st.markdown(
    "This application uses **Natural Language Processing (NLP)** and "
    "**Logistic Regression** to analyze the sentiment of tweets. The model is trained using a dataset "
    "of tweets labeled as **Positive, Neutral, or Negative**."
)

# 📂 Dataset Page
if page == "📂 Dataset":
    st.header("📂 Dataset Preview")
    st.markdown("### Displaying Rows **50-55** from the Training Data:")
    st.dataframe(df.iloc[49:55])

# 📊 Visualization Page
elif page == "📊 Visualizations":
    st.header("📊 Data Visualizations")
    
    # Pie Chart of Sentiments
    st.subheader("🥧 Sentiment Distribution")
    fig, ax = plt.subplots(figsize=(5, 5))
    df["label"].value_counts().plot(kind="pie", autopct="%1.1f%%", colors=["green", "gray", "red", "blue"], ax=ax)
    plt.title("Sentiment Distribution")
    plt.ylabel("")
    st.pyplot(fig)

    # Bar Chart of Sentiment Counts
    st.subheader("📊 Sentiment Count (Bar Chart)")
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.countplot(x=df["label"], palette={"Positive": "green", "Neutral": "gray", "Negative": "red", "Irrelevant": "blue"}, ax=ax)
    plt.xlabel("Sentiment Type")
    plt.ylabel("Count")
    plt.title("Distribution of Sentiments")
    st.pyplot(fig)

    # Word Cloud for Most Frequent Words
    st.subheader("☁️ Word Cloud of Most Common Words")
    text_data = " ".join(df["text"].astype(str))
    wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text_data)
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.imshow(wordcloud, interpolation="bilinear")
    ax.axis("off")
    st.pyplot(fig)

# 📈 Model Metrics Page
elif page == "📈 Model Metrics":
    st.header("📈 Model Performance")

    if accuracy is not None:
        st.write(f"✅ **Accuracy:** {accuracy * 100:.2f}%")
    else:
        st.warning("⚠️ Could not calculate accuracy. Please check the test dataset.")

    if class_report_df is not None and not class_report_df.empty:
        st.subheader("📌 Classification Report")
        st.dataframe(class_report_df)
    else:
        st.warning("⚠️ Classification report is empty.")

    if cm is not None and cm.any():
        st.subheader("🔥 Confusion Matrix")
        fig, ax = plt.subplots(figsize=(6, 5))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Positive", "Neutral", "Negative"], yticklabels=["Positive", "Neutral", "Negative"], ax=ax)
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.title("Confusion Matrix")
        st.pyplot(fig)
    else:
        st.warning("⚠️ Confusion matrix could not be generated.")

# 🤖 Sentiment Predictor Page
elif page == "🤖 Sentiment Predictor":
    st.header("🤖 Sentiment Analysis")
    st.markdown("Enter a sentence below, and the model will predict whether it is **Positive, Neutral, or Negative**.")

    user_input = st.text_area("Type your sentence here:", "")

    if st.button("Analyze Sentiment"):
        if user_input.strip():
            sentiment_result = predict_sentiment(user_input)
            st.markdown(f"### 🔍 Prediction: **{sentiment_result}**")
        else:
            st.warning("Please enter some text to analyze.")