import joblib import streamlit as st import pandas as pd import re import nltk import matplotlib.pyplot as plt import seaborn as sns from wordcloud import WordCloud from nltk.corpus import stopwords from sklearn.metrics import accuracy_score, classification_report, confusion_matrix # Download stopwords if not already available nltk.download("stopwords") stop_words = set(stopwords.words("english")) # Load the trained model and TF-IDF vectorizer model = joblib.load("sentiment_model.pkl") vectorizer = joblib.load("tfidf_vectorizer.pkl") # Load dataset with manually defined headers column_names = ["id", "place", "label", "text"] df = pd.read_csv("twitter_training.csv", names=column_names, header=None) # Function to preprocess text def preprocess_text(text): text = str(text).lower() text = re.sub(r"\W", " ", text) # Remove special characters text = re.sub(r"\s+", " ", text).strip() # Remove extra spaces text = " ".join([word for word in text.split() if word not in stop_words]) # Remove stopwords return text # Load test dataset and compute model metrics try: test_df = pd.read_csv("twitter_validation.csv", names=column_names, header=None) X_test = vectorizer.transform(test_df["text"].astype(str)) y_test = test_df["label"] y_pred = model.predict(X_test) # Model metrics accuracy = accuracy_score(y_test, y_pred) classification_report_text = classification_report(y_test, y_pred, output_dict=True) class_report_df = pd.DataFrame(classification_report_text).T.round(2) # Compute confusion matrix cm = confusion_matrix(y_test, y_pred, labels=["Positive", "Neutral", "Negative"]) except Exception as e: accuracy = None class_report_df = None cm = None # Function to predict sentiment def predict_sentiment(user_input): cleaned_text = preprocess_text(user_input) text_vector = vectorizer.transform([cleaned_text]) prediction = model.predict(text_vector)[0] return prediction # Sidebar Navigation st.sidebar.title("🔍 Sentiment Analysis App") st.sidebar.markdown( "This app performs **Sentiment Analysis** on text using **Machine Learning**. " "It classifies text as **Positive, Neutral, or Negative** based on its sentiment." ) st.sidebar.header("📌 Navigation") page = st.sidebar.radio( "Go to:", ["📂 Dataset", "📊 Visualizations", "📈 Model Metrics", "🤖 Sentiment Predictor"] ) # App Title and Explanation st.title("📢 Twitter Sentiment Analysis") st.markdown( "This application uses **Natural Language Processing (NLP)** and " "**Logistic Regression** to analyze the sentiment of tweets. The model is trained using a dataset " "of tweets labeled as **Positive, Neutral, or Negative**." ) # 📂 Dataset Page if page == "📂 Dataset": st.header("📂 Dataset Preview") st.dataframe(df.iloc[49:105]) # 📊 Visualization Page elif page == "📊 Visualizations": st.header("📊 Data Visualizations") # Pie Chart of Sentiments st.subheader("🥧 Sentiment Distribution") fig, ax = plt.subplots(figsize=(5, 5)) df["label"].value_counts().plot(kind="pie", autopct="%1.1f%%", colors=["green", "gray", "red", "blue"], ax=ax) plt.title("Sentiment Distribution") plt.ylabel("") st.pyplot(fig) # Bar Chart of Sentiment Counts st.subheader("📊 Sentiment Count (Bar Chart)") fig, ax = plt.subplots(figsize=(6, 4)) sns.countplot(x=df["label"], palette={"Positive": "green", "Neutral": "gray", "Negative": "red", "Irrelevant": "blue"}, ax=ax) plt.xlabel("Sentiment Type") plt.ylabel("Count") plt.title("Distribution of Sentiments") st.pyplot(fig) # Word Cloud for Most Frequent Words st.subheader("☁️ Word Cloud of Most Common Words") text_data = " ".join(df["text"].astype(str)) wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text_data) fig, ax = plt.subplots(figsize=(8, 4)) ax.imshow(wordcloud, interpolation="bilinear") ax.axis("off") st.pyplot(fig) # 📈 Model Metrics Page elif page == "📈 Model Metrics": st.header("📈 Model Performance") if accuracy is not None: st.write(f"✅ **Accuracy:** {accuracy * 100:.2f}%") else: st.warning("⚠️ Could not calculate accuracy. Please check the test dataset.") if class_report_df is not None and not class_report_df.empty: st.subheader("📌 Classification Report") st.dataframe(class_report_df) else: st.warning("⚠️ Classification report is empty.") if cm is not None and cm.any(): st.subheader("🔥 Confusion Matrix") fig, ax = plt.subplots(figsize=(6, 5)) sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Positive", "Neutral", "Negative"], yticklabels=["Positive", "Neutral", "Negative"], ax=ax) plt.xlabel("Predicted") plt.ylabel("Actual") plt.title("Confusion Matrix") st.pyplot(fig) else: st.warning("⚠️ Confusion matrix could not be generated.") # 🤖 Sentiment Predictor Page elif page == "🤖 Sentiment Predictor": st.header("🤖 Sentiment Analysis") st.markdown("Enter a sentence below, and the model will predict whether it is **Positive, Neutral, or Negative**.") user_input = st.text_area("Type your sentence here:", "") if st.button("Analyze Sentiment"): if user_input.strip(): sentiment_result = predict_sentiment(user_input) st.markdown(f"### 🔍 Prediction: **{sentiment_result}**") else: st.warning("Please enter some text to analyze.")