Spaces:
Sleeping
Sleeping
File size: 5,720 Bytes
0e876c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import joblib
import streamlit as st
import pandas as pd
import re
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from nltk.corpus import stopwords
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Download stopwords if not already available
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))
# Load the trained model and TF-IDF vectorizer
model = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")
# Load dataset with manually defined headers
column_names = ["id", "place", "label", "text"]
df = pd.read_csv("twitter_training.csv", names=column_names, header=None)
# Function to preprocess text
def preprocess_text(text):
text = str(text).lower()
text = re.sub(r"\W", " ", text) # Remove special characters
text = re.sub(r"\s+", " ", text).strip() # Remove extra spaces
text = " ".join([word for word in text.split() if word not in stop_words]) # Remove stopwords
return text
# Load test dataset and compute model metrics
try:
test_df = pd.read_csv("twitter_validation.csv", names=column_names, header=None)
X_test = vectorizer.transform(test_df["text"].astype(str))
y_test = test_df["label"]
y_pred = model.predict(X_test)
# Model metrics
accuracy = accuracy_score(y_test, y_pred)
classification_report_text = classification_report(y_test, y_pred, output_dict=True)
class_report_df = pd.DataFrame(classification_report_text).T.round(2)
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=["Positive", "Neutral", "Negative"])
except Exception as e:
accuracy = None
class_report_df = None
cm = None
# Function to predict sentiment
def predict_sentiment(user_input):
cleaned_text = preprocess_text(user_input)
text_vector = vectorizer.transform([cleaned_text])
prediction = model.predict(text_vector)[0]
return prediction
# Sidebar Navigation
st.sidebar.title("π Sentiment Analysis App")
st.sidebar.markdown(
"This app performs **Sentiment Analysis** on text using **Machine Learning**. "
"It classifies text as **Positive, Neutral, or Negative** based on its sentiment."
)
st.sidebar.header("π Navigation")
page = st.sidebar.radio(
"Go to:",
["π Dataset", "π Visualizations", "π Model Metrics", "π€ Sentiment Predictor"]
)
# App Title and Explanation
st.title("π’ Twitter Sentiment Analysis")
st.markdown(
"This application uses **Natural Language Processing (NLP)** and "
"**Logistic Regression** to analyze the sentiment of tweets. The model is trained using a dataset "
"of tweets labeled as **Positive, Neutral, or Negative**."
)
# π Dataset Page
if page == "π Dataset":
st.header("π Dataset Preview")
st.markdown("### Displaying Rows **50-55** from the Training Data:")
st.dataframe(df.iloc[49:55])
# π Visualization Page
elif page == "π Visualizations":
st.header("π Data Visualizations")
# Pie Chart of Sentiments
st.subheader("π₯§ Sentiment Distribution")
fig, ax = plt.subplots(figsize=(5, 5))
df["label"].value_counts().plot(kind="pie", autopct="%1.1f%%", colors=["green", "gray", "red", "blue"], ax=ax)
plt.title("Sentiment Distribution")
plt.ylabel("")
st.pyplot(fig)
# Bar Chart of Sentiment Counts
st.subheader("π Sentiment Count (Bar Chart)")
fig, ax = plt.subplots(figsize=(6, 4))
sns.countplot(x=df["label"], palette={"Positive": "green", "Neutral": "gray", "Negative": "red", "Irrelevant": "blue"}, ax=ax)
plt.xlabel("Sentiment Type")
plt.ylabel("Count")
plt.title("Distribution of Sentiments")
st.pyplot(fig)
# Word Cloud for Most Frequent Words
st.subheader("βοΈ Word Cloud of Most Common Words")
text_data = " ".join(df["text"].astype(str))
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text_data)
fig, ax = plt.subplots(figsize=(8, 4))
ax.imshow(wordcloud, interpolation="bilinear")
ax.axis("off")
st.pyplot(fig)
# π Model Metrics Page
elif page == "π Model Metrics":
st.header("π Model Performance")
if accuracy is not None:
st.write(f"β
**Accuracy:** {accuracy * 100:.2f}%")
else:
st.warning("β οΈ Could not calculate accuracy. Please check the test dataset.")
if class_report_df is not None and not class_report_df.empty:
st.subheader("π Classification Report")
st.dataframe(class_report_df)
else:
st.warning("β οΈ Classification report is empty.")
if cm is not None and cm.any():
st.subheader("π₯ Confusion Matrix")
fig, ax = plt.subplots(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Positive", "Neutral", "Negative"], yticklabels=["Positive", "Neutral", "Negative"], ax=ax)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
st.pyplot(fig)
else:
st.warning("β οΈ Confusion matrix could not be generated.")
# π€ Sentiment Predictor Page
elif page == "π€ Sentiment Predictor":
st.header("π€ Sentiment Analysis")
st.markdown("Enter a sentence below, and the model will predict whether it is **Positive, Neutral, or Negative**.")
user_input = st.text_area("Type your sentence here:", "")
if st.button("Analyze Sentiment"):
if user_input.strip():
sentiment_result = predict_sentiment(user_input)
st.markdown(f"### π Prediction: **{sentiment_result}**")
else:
st.warning("Please enter some text to analyze.")
|