Spaces:

CristopherWVSU
/

SpamDetection

Sleeping

App Files Files Community

CristopherWVSU commited on Mar 16

Commit

884d5f3

1 Parent(s): 71710c0

Added more models

Browse files

Files changed (13) hide show

LRclassification_report.png +0 -0
LRconfusion_matrix.png +0 -0
LRspam_classifier_model.pkl +3 -0
classification_report.png → MNBclassification_report.png +0 -0
MNBconfusion_matrix.png +0 -0
spam_classifier.pkl → MNBspam_classifier_model.pkl +0 -0
SVM_classification_report.png +0 -0
SVMconfusion_matrix.png +0 -0
SVMspam_classifier.pkl +3 -0
app.py +63 -27
confusion_matrix.png +0 -0
main.ipynb +0 -0
tfidf_vectorizer.pkl +1 -1

LRclassification_report.png ADDED Viewed

LRconfusion_matrix.png ADDED Viewed

LRspam_classifier_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ad0ce4dec8e20221e63ff8de41f9528b1ec07878189ab000a09f9607e6470a5
+size 31663

classification_report.png → MNBclassification_report.png RENAMED Viewed

File without changes

MNBconfusion_matrix.png ADDED Viewed

spam_classifier.pkl → MNBspam_classifier_model.pkl RENAMED Viewed

File without changes

SVM_classification_report.png ADDED Viewed

SVMconfusion_matrix.png ADDED Viewed

SVMspam_classifier.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ccca33faa944372b33275ba2fe09b795c1efaf780ee65c6fb6331e0607e8d12
+size 106635

app.py CHANGED Viewed

@@ -5,16 +5,26 @@ import string
 import nltk
 from nltk.corpus import stopwords
-# LOAD THE MODEL AND VECTORIZERS
-model = joblib.load("spam_classifier.pkl")
-vectorizer = joblib.load("tfidf_vectorizer.pkl")
 nltk.download("stopwords")
-# REDUCE THE INPUT TO ITS MOST BASIC FORM
 def preprocess_text(text):
     text = text.lower()
     text = re.sub(r"\d+", "", text)
@@ -23,14 +33,14 @@ def preprocess_text(text):
     words = [word for word in words if word not in stopwords.words("english")]
     return " ".join(words)
-app, model_eval = st.tabs(["Application", "Model Evaluation"])
-# STREAMLIT APP TAB 1
 with app:
     st.title("📩 Spam Detector App")
     st.write("Enter a message below to check if it's **Spam** or **Not Spam**.")
     user_input = st.text_area("Enter your message:")
     if st.button("Check Spam"):
@@ -40,29 +50,55 @@ with app:
             prediction = model.predict(input_vector)
             result = "Spam" if prediction[0] == 1 else "Not Spam"
-            st.success(f"Prediction: {result}")
         else:
             st.warning("Please enter a message to check.")
 with model_eval:
     st.header("Model Evaluation")
-    st.write("The Spam Detection model was trained in order to detect if a message is considered a 'Spam' or 'Not Spam'. The dataset was taken from kaggle.")
-    st.write("dataset by Faisal Qureshi: https://www.kaggle.com/datasets/mfaisalqureshi/spam-email")
-    # CONFUSION MATRIX
     st.title("Confusion Matrix")
-    st.write("The confusion matrix displays the actual values or true labels with the predicted values from the model. With this, we can identify the margin of error the model has. Consider the following when understanding the confusion matrix:")
-    st.write("True Positives (TP): Correctly predicted Spam")
-    st.write("True Negatives (TN): Correctly predicted Not Spam")
-    st.write("False Positives (FP): Predicted Spam but it was actually Not Spam (Type I error)")
-    st.write("False Negatives (FN): Predicted Not Spam but it was actually Spam (Type II error)")
-    st.image("confusion_matrix.png")
-    # EVALUATION MATRICS
     st.title("Evaluation Metrics")
-    st.write("The image below represents the Accuracy, F1 score and the classification report of the model")
-    st.image("classification_report.png")

 import nltk
 from nltk.corpus import stopwords
+# Download stopwords
 nltk.download("stopwords")
+# Sidebar Model Selection
+st.sidebar.title("🔍 Choose Model")
+model_choice = st.sidebar.radio(
+    "Select a model for Spam Detection:",
+    ("Naive Bayes", "Logistic Regression", "Support Vector Machine")
+)
+# Load selected model
+model_paths = {
+    "Naive Bayes": "MNBspam_classifier_model.pkl",
+    "Logistic Regression": "LRspam_classifier_model.pkl",
+    "Support Vector Machine": "SVMspam_classifier.pkl"
+}
+model = joblib.load(model_paths[model_choice])
+vectorizer = joblib.load("tfidf_vectorizer.pkl")
+# Function to preprocess text
 def preprocess_text(text):
     text = text.lower()
     text = re.sub(r"\d+", "", text)
     words = [word for word in words if word not in stopwords.words("english")]
     return " ".join(words)
+# Tabs for Application & Model Evaluation
+app, model_eval = st.tabs(["📩 Application", "📊 Model Evaluation"])
+# Spam Detector Application
 with app:
     st.title("📩 Spam Detector App")
     st.write("Enter a message below to check if it's **Spam** or **Not Spam**.")
     user_input = st.text_area("Enter your message:")
     if st.button("Check Spam"):
             prediction = model.predict(input_vector)
             result = "Spam" if prediction[0] == 1 else "Not Spam"
+            st.success(f"Prediction: {result} ({model_choice})")
         else:
             st.warning("Please enter a message to check.")
+# Model Evaluation Tab
 with model_eval:
     st.header("Model Evaluation")
+    st.write("The Spam Detection model was trained to classify messages as 'Spam' or 'Not Spam'. The dataset was taken from Kaggle.")
+    st.write("Dataset by Faisal Qureshi: [Kaggle Link](https://www.kaggle.com/datasets/mfaisalqureshi/spam-email)")
+    # Confusion Matrix
     st.title("Confusion Matrix")
+    st.write("The confusion matrix displays actual vs. predicted labels. Consider the following when interpreting it:")
+    st.write("- **True Positives (TP):** Correctly predicted Spam")
+    st.write("- **True Negatives (TN):** Correctly predicted Not Spam")
+    st.write("- **False Positives (FP):** Predicted Spam but was actually Not Spam (Type I error)")
+    st.write("- **False Negatives (FN):** Predicted Not Spam but was actually Spam (Type II error)")
+    st.header("Naive Bayes Confusion Matrix")
+    st.write("The image below represents the Confusion Matrix of the Naive Bayes model.")
+    st.image("MNBconfusion_matrix.png")
+    st.header("Logistic Regression Confusion Matrix")
+    st.write("The image below represents the Confusion Matrix of the Logistic Regression model.")
+    st.image("LRconfusion_matrix.png")
+    st.header("SVM Confusion Matrix")
+    st.write("The image below represents the Confusion Matrix of the SVM model.")
+    st.image("SVMconfusion_matrix.png")
+    # Evaluation Metrics
     st.title("Evaluation Metrics")
+    st.write("Evaluation metrics help assess the performance of the spam detector.")
+    st.header("Naive Bayes Evaluation Metrics")
+    st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Naive Bayes model.")
+    st.image("MNBclassification_report.png")
+    st.header("Logistic Regression Evaluation Metrics")
+    st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Logistic Regression model.")
+    st.image("LRclassification_report.png")
+    st.header("SVM Evaluation Metrics")
+    st.write("The image below represents the **Accuracy, F1 score, and classification report** of the SVM model.")
+    st.image("SVM_classification_report.png")
+    # COMPARISON
+    st.header("Comparison")
+    st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the three classification algorithms chosen, Naive Bayes performs the best using this dataset")

confusion_matrix.png DELETED Viewed

Binary file (16.6 kB)

main.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

tfidf_vectorizer.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:856e0ba9a758d06ab564a5675d2f538c180786a5aecba0d03b3ed5c98fb10968
 size 78711

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0b3264f32054f57cdda0912eaec6c6961c77902787d05dfe2255e0d532b5e55
 size 78711