Spaces:
Sleeping
Sleeping
import joblib | |
import streamlit as st | |
import pandas as pd | |
import re | |
import nltk | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from wordcloud import WordCloud | |
from nltk.corpus import stopwords | |
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix | |
# Download stopwords if not already available | |
nltk.download("stopwords") | |
stop_words = set(stopwords.words("english")) | |
# Load the trained model and TF-IDF vectorizer | |
model = joblib.load("sentiment_model.pkl") | |
vectorizer = joblib.load("tfidf_vectorizer.pkl") | |
# Load dataset with manually defined headers | |
column_names = ["id", "place", "label", "text"] | |
df = pd.read_csv("twitter_training.csv", names=column_names, header=None) | |
# Function to preprocess text | |
def preprocess_text(text): | |
text = str(text).lower() | |
text = re.sub(r"\W", " ", text) # Remove special characters | |
text = re.sub(r"\s+", " ", text).strip() # Remove extra spaces | |
text = " ".join([word for word in text.split() if word not in stop_words]) # Remove stopwords | |
return text | |
# Load test dataset and compute model metrics | |
try: | |
test_df = pd.read_csv("twitter_validation.csv", names=column_names, header=None) | |
X_test = vectorizer.transform(test_df["text"].astype(str)) | |
y_test = test_df["label"] | |
y_pred = model.predict(X_test) | |
# Model metrics | |
accuracy = accuracy_score(y_test, y_pred) | |
classification_report_text = classification_report(y_test, y_pred, output_dict=True) | |
class_report_df = pd.DataFrame(classification_report_text).T.round(2) | |
# Compute confusion matrix | |
cm = confusion_matrix(y_test, y_pred, labels=["Positive", "Neutral", "Negative"]) | |
except Exception as e: | |
accuracy = None | |
class_report_df = None | |
cm = None | |
# Function to predict sentiment | |
def predict_sentiment(user_input): | |
cleaned_text = preprocess_text(user_input) | |
text_vector = vectorizer.transform([cleaned_text]) | |
prediction = model.predict(text_vector)[0] | |
return prediction | |
# Sidebar Navigation | |
st.sidebar.title("π Sentiment Analysis App") | |
st.sidebar.markdown( | |
"This app performs **Sentiment Analysis** on text using **Machine Learning**. " | |
"It classifies text as **Positive, Neutral, or Negative** based on its sentiment." | |
) | |
st.sidebar.header("π Navigation") | |
page = st.sidebar.radio( | |
"Go to:", | |
["π Dataset", "π Visualizations", "π Model Metrics", "π€ Sentiment Predictor"] | |
) | |
# App Title and Explanation | |
st.title("π’ Twitter Sentiment Analysis") | |
st.markdown( | |
"This application uses **Natural Language Processing (NLP)** and " | |
"**Logistic Regression** to analyze the sentiment of tweets. The model is trained using a dataset " | |
"of tweets labeled as **Positive, Neutral, or Negative**." | |
) | |
# π Dataset Page | |
if page == "π Dataset": | |
st.header("π Dataset Preview") | |
st.dataframe(df.iloc[49:105]) | |
# π Visualization Page | |
elif page == "π Visualizations": | |
st.header("π Data Visualizations") | |
# Pie Chart of Sentiments | |
st.subheader("π₯§ Sentiment Distribution") | |
fig, ax = plt.subplots(figsize=(5, 5)) | |
df["label"].value_counts().plot(kind="pie", autopct="%1.1f%%", colors=["green", "gray", "red", "blue"], ax=ax) | |
plt.title("Sentiment Distribution") | |
plt.ylabel("") | |
st.pyplot(fig) | |
# Bar Chart of Sentiment Counts | |
st.subheader("π Sentiment Count (Bar Chart)") | |
fig, ax = plt.subplots(figsize=(6, 4)) | |
sns.countplot(x=df["label"], palette={"Positive": "green", "Neutral": "gray", "Negative": "red", "Irrelevant": "blue"}, ax=ax) | |
plt.xlabel("Sentiment Type") | |
plt.ylabel("Count") | |
plt.title("Distribution of Sentiments") | |
st.pyplot(fig) | |
# Word Cloud for Most Frequent Words | |
st.subheader("βοΈ Word Cloud of Most Common Words") | |
text_data = " ".join(df["text"].astype(str)) | |
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text_data) | |
fig, ax = plt.subplots(figsize=(8, 4)) | |
ax.imshow(wordcloud, interpolation="bilinear") | |
ax.axis("off") | |
st.pyplot(fig) | |
# π Model Metrics Page | |
elif page == "π Model Metrics": | |
st.header("π Model Performance") | |
if accuracy is not None: | |
st.write(f"β **Accuracy:** {accuracy * 100:.2f}%") | |
else: | |
st.warning("β οΈ Could not calculate accuracy. Please check the test dataset.") | |
if class_report_df is not None and not class_report_df.empty: | |
st.subheader("π Classification Report") | |
st.dataframe(class_report_df) | |
else: | |
st.warning("β οΈ Classification report is empty.") | |
if cm is not None and cm.any(): | |
st.subheader("π₯ Confusion Matrix") | |
fig, ax = plt.subplots(figsize=(6, 5)) | |
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Positive", "Neutral", "Negative"], yticklabels=["Positive", "Neutral", "Negative"], ax=ax) | |
plt.xlabel("Predicted") | |
plt.ylabel("Actual") | |
plt.title("Confusion Matrix") | |
st.pyplot(fig) | |
else: | |
st.warning("β οΈ Confusion matrix could not be generated.") | |
# π€ Sentiment Predictor Page | |
elif page == "π€ Sentiment Predictor": | |
st.header("π€ Sentiment Analysis") | |
st.markdown("Enter a sentence below, and the model will predict whether it is **Positive, Neutral, or Negative**.") | |
user_input = st.text_area("Type your sentence here:", "") | |
if st.button("Analyze Sentiment"): | |
if user_input.strip(): | |
sentiment_result = predict_sentiment(user_input) | |
st.markdown(f"### π Prediction: **{sentiment_result}**") | |
else: | |
st.warning("Please enter some text to analyze.") | |