File size: 5,720 Bytes
0e876c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import joblib
import streamlit as st
import pandas as pd
import re
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from nltk.corpus import stopwords
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Download stopwords if not already available
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

# Load the trained model and TF-IDF vectorizer
model = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")

# Load dataset with manually defined headers
column_names = ["id", "place", "label", "text"]
df = pd.read_csv("twitter_training.csv", names=column_names, header=None)

# Function to preprocess text
def preprocess_text(text):
    text = str(text).lower()
    text = re.sub(r"\W", " ", text)  # Remove special characters
    text = re.sub(r"\s+", " ", text).strip()  # Remove extra spaces
    text = " ".join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

# Load test dataset and compute model metrics
try:
    test_df = pd.read_csv("twitter_validation.csv", names=column_names, header=None)
    X_test = vectorizer.transform(test_df["text"].astype(str))
    y_test = test_df["label"]
    y_pred = model.predict(X_test)

    # Model metrics
    accuracy = accuracy_score(y_test, y_pred)
    classification_report_text = classification_report(y_test, y_pred, output_dict=True)
    class_report_df = pd.DataFrame(classification_report_text).T.round(2)
    
    # Compute confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels=["Positive", "Neutral", "Negative"])

except Exception as e:
    accuracy = None
    class_report_df = None
    cm = None

# Function to predict sentiment
def predict_sentiment(user_input):
    cleaned_text = preprocess_text(user_input)
    text_vector = vectorizer.transform([cleaned_text])
    prediction = model.predict(text_vector)[0]
    return prediction

# Sidebar Navigation
st.sidebar.title("πŸ” Sentiment Analysis App")
st.sidebar.markdown(
    "This app performs **Sentiment Analysis** on text using **Machine Learning**. "
    "It classifies text as **Positive, Neutral, or Negative** based on its sentiment."
)

st.sidebar.header("πŸ“Œ Navigation")
page = st.sidebar.radio(
    "Go to:", 
    ["πŸ“‚ Dataset", "πŸ“Š Visualizations", "πŸ“ˆ Model Metrics", "πŸ€– Sentiment Predictor"]
)

# App Title and Explanation
st.title("πŸ“’ Twitter Sentiment Analysis")
st.markdown(
    "This application uses **Natural Language Processing (NLP)** and "
    "**Logistic Regression** to analyze the sentiment of tweets. The model is trained using a dataset "
    "of tweets labeled as **Positive, Neutral, or Negative**."
)

# πŸ“‚ Dataset Page
if page == "πŸ“‚ Dataset":
    st.header("πŸ“‚ Dataset Preview")
    st.markdown("### Displaying Rows **50-55** from the Training Data:")
    st.dataframe(df.iloc[49:55])

# πŸ“Š Visualization Page
elif page == "πŸ“Š Visualizations":
    st.header("πŸ“Š Data Visualizations")
    
    # Pie Chart of Sentiments
    st.subheader("πŸ₯§ Sentiment Distribution")
    fig, ax = plt.subplots(figsize=(5, 5))
    df["label"].value_counts().plot(kind="pie", autopct="%1.1f%%", colors=["green", "gray", "red", "blue"], ax=ax)
    plt.title("Sentiment Distribution")
    plt.ylabel("")
    st.pyplot(fig)

    # Bar Chart of Sentiment Counts
    st.subheader("πŸ“Š Sentiment Count (Bar Chart)")
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.countplot(x=df["label"], palette={"Positive": "green", "Neutral": "gray", "Negative": "red", "Irrelevant": "blue"}, ax=ax)
    plt.xlabel("Sentiment Type")
    plt.ylabel("Count")
    plt.title("Distribution of Sentiments")
    st.pyplot(fig)

    # Word Cloud for Most Frequent Words
    st.subheader("☁️ Word Cloud of Most Common Words")
    text_data = " ".join(df["text"].astype(str))
    wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text_data)
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.imshow(wordcloud, interpolation="bilinear")
    ax.axis("off")
    st.pyplot(fig)

# πŸ“ˆ Model Metrics Page
elif page == "πŸ“ˆ Model Metrics":
    st.header("πŸ“ˆ Model Performance")

    if accuracy is not None:
        st.write(f"βœ… **Accuracy:** {accuracy * 100:.2f}%")
    else:
        st.warning("⚠️ Could not calculate accuracy. Please check the test dataset.")

    if class_report_df is not None and not class_report_df.empty:
        st.subheader("πŸ“Œ Classification Report")
        st.dataframe(class_report_df)
    else:
        st.warning("⚠️ Classification report is empty.")

    if cm is not None and cm.any():
        st.subheader("πŸ”₯ Confusion Matrix")
        fig, ax = plt.subplots(figsize=(6, 5))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Positive", "Neutral", "Negative"], yticklabels=["Positive", "Neutral", "Negative"], ax=ax)
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.title("Confusion Matrix")
        st.pyplot(fig)
    else:
        st.warning("⚠️ Confusion matrix could not be generated.")

# πŸ€– Sentiment Predictor Page
elif page == "πŸ€– Sentiment Predictor":
    st.header("πŸ€– Sentiment Analysis")
    st.markdown("Enter a sentence below, and the model will predict whether it is **Positive, Neutral, or Negative**.")

    user_input = st.text_area("Type your sentence here:", "")

    if st.button("Analyze Sentiment"):
        if user_input.strip():
            sentiment_result = predict_sentiment(user_input)
            st.markdown(f"### πŸ” Prediction: **{sentiment_result}**")
        else:
            st.warning("Please enter some text to analyze.")