CristopherWVSU commited on
Commit
884d5f3
Β·
1 Parent(s): 71710c0

Added more models

Browse files
LRclassification_report.png ADDED
LRconfusion_matrix.png ADDED
LRspam_classifier_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad0ce4dec8e20221e63ff8de41f9528b1ec07878189ab000a09f9607e6470a5
3
+ size 31663
classification_report.png β†’ MNBclassification_report.png RENAMED
File without changes
MNBconfusion_matrix.png ADDED
spam_classifier.pkl β†’ MNBspam_classifier_model.pkl RENAMED
File without changes
SVM_classification_report.png ADDED
SVMconfusion_matrix.png ADDED
SVMspam_classifier.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ccca33faa944372b33275ba2fe09b795c1efaf780ee65c6fb6331e0607e8d12
3
+ size 106635
app.py CHANGED
@@ -5,16 +5,26 @@ import string
5
  import nltk
6
  from nltk.corpus import stopwords
7
 
8
-
9
- # LOAD THE MODEL AND VECTORIZERS
10
- model = joblib.load("spam_classifier.pkl")
11
- vectorizer = joblib.load("tfidf_vectorizer.pkl")
12
-
13
-
14
  nltk.download("stopwords")
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- # REDUCE THE INPUT TO ITS MOST BASIC FORM
18
  def preprocess_text(text):
19
  text = text.lower()
20
  text = re.sub(r"\d+", "", text)
@@ -23,14 +33,14 @@ def preprocess_text(text):
23
  words = [word for word in words if word not in stopwords.words("english")]
24
  return " ".join(words)
25
 
 
 
26
 
27
- app, model_eval = st.tabs(["Application", "Model Evaluation"])
28
- # STREAMLIT APP TAB 1
29
  with app:
30
  st.title("πŸ“© Spam Detector App")
31
  st.write("Enter a message below to check if it's **Spam** or **Not Spam**.")
32
-
33
-
34
  user_input = st.text_area("Enter your message:")
35
 
36
  if st.button("Check Spam"):
@@ -40,29 +50,55 @@ with app:
40
  prediction = model.predict(input_vector)
41
 
42
  result = "Spam" if prediction[0] == 1 else "Not Spam"
43
- st.success(f"Prediction: {result}")
44
  else:
45
  st.warning("Please enter a message to check.")
46
 
 
47
  with model_eval:
48
-
49
  st.header("Model Evaluation")
50
- st.write("The Spam Detection model was trained in order to detect if a message is considered a 'Spam' or 'Not Spam'. The dataset was taken from kaggle.")
51
- st.write("dataset by Faisal Qureshi: https://www.kaggle.com/datasets/mfaisalqureshi/spam-email")
52
-
53
- # CONFUSION MATRIX
54
  st.title("Confusion Matrix")
55
- st.write("The confusion matrix displays the actual values or true labels with the predicted values from the model. With this, we can identify the margin of error the model has. Consider the following when understanding the confusion matrix:")
56
- st.write("True Positives (TP): Correctly predicted Spam")
57
- st.write("True Negatives (TN): Correctly predicted Not Spam")
58
- st.write("False Positives (FP): Predicted Spam but it was actually Not Spam (Type I error)")
59
- st.write("False Negatives (FN): Predicted Not Spam but it was actually Spam (Type II error)")
60
- st.image("confusion_matrix.png")
61
-
62
- # EVALUATION MATRICS
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  st.title("Evaluation Metrics")
64
- st.write("The image below represents the Accuracy, F1 score and the classification report of the model")
65
- st.image("classification_report.png")
66
 
 
 
 
 
 
 
 
 
 
 
 
67
 
 
68
 
 
 
 
5
  import nltk
6
  from nltk.corpus import stopwords
7
 
8
+ # Download stopwords
 
 
 
 
 
9
  nltk.download("stopwords")
10
 
11
+ # Sidebar Model Selection
12
+ st.sidebar.title("πŸ” Choose Model")
13
+ model_choice = st.sidebar.radio(
14
+ "Select a model for Spam Detection:",
15
+ ("Naive Bayes", "Logistic Regression", "Support Vector Machine")
16
+ )
17
+
18
+ # Load selected model
19
+ model_paths = {
20
+ "Naive Bayes": "MNBspam_classifier_model.pkl",
21
+ "Logistic Regression": "LRspam_classifier_model.pkl",
22
+ "Support Vector Machine": "SVMspam_classifier.pkl"
23
+ }
24
+ model = joblib.load(model_paths[model_choice])
25
+ vectorizer = joblib.load("tfidf_vectorizer.pkl")
26
 
27
+ # Function to preprocess text
28
  def preprocess_text(text):
29
  text = text.lower()
30
  text = re.sub(r"\d+", "", text)
 
33
  words = [word for word in words if word not in stopwords.words("english")]
34
  return " ".join(words)
35
 
36
+ # Tabs for Application & Model Evaluation
37
+ app, model_eval = st.tabs(["πŸ“© Application", "πŸ“Š Model Evaluation"])
38
 
39
+ # Spam Detector Application
 
40
  with app:
41
  st.title("πŸ“© Spam Detector App")
42
  st.write("Enter a message below to check if it's **Spam** or **Not Spam**.")
43
+
 
44
  user_input = st.text_area("Enter your message:")
45
 
46
  if st.button("Check Spam"):
 
50
  prediction = model.predict(input_vector)
51
 
52
  result = "Spam" if prediction[0] == 1 else "Not Spam"
53
+ st.success(f"Prediction: {result} ({model_choice})")
54
  else:
55
  st.warning("Please enter a message to check.")
56
 
57
+ # Model Evaluation Tab
58
  with model_eval:
 
59
  st.header("Model Evaluation")
60
+ st.write("The Spam Detection model was trained to classify messages as 'Spam' or 'Not Spam'. The dataset was taken from Kaggle.")
61
+ st.write("Dataset by Faisal Qureshi: [Kaggle Link](https://www.kaggle.com/datasets/mfaisalqureshi/spam-email)")
62
+
63
+ # Confusion Matrix
64
  st.title("Confusion Matrix")
65
+ st.write("The confusion matrix displays actual vs. predicted labels. Consider the following when interpreting it:")
66
+ st.write("- **True Positives (TP):** Correctly predicted Spam")
67
+ st.write("- **True Negatives (TN):** Correctly predicted Not Spam")
68
+ st.write("- **False Positives (FP):** Predicted Spam but was actually Not Spam (Type I error)")
69
+ st.write("- **False Negatives (FN):** Predicted Not Spam but was actually Spam (Type II error)")
70
+
71
+ st.header("Naive Bayes Confusion Matrix")
72
+ st.write("The image below represents the Confusion Matrix of the Naive Bayes model.")
73
+ st.image("MNBconfusion_matrix.png")
74
+
75
+ st.header("Logistic Regression Confusion Matrix")
76
+ st.write("The image below represents the Confusion Matrix of the Logistic Regression model.")
77
+ st.image("LRconfusion_matrix.png")
78
+
79
+
80
+ st.header("SVM Confusion Matrix")
81
+ st.write("The image below represents the Confusion Matrix of the SVM model.")
82
+ st.image("SVMconfusion_matrix.png")
83
+
84
+
85
+ # Evaluation Metrics
86
  st.title("Evaluation Metrics")
87
+ st.write("Evaluation metrics help assess the performance of the spam detector.")
 
88
 
89
+ st.header("Naive Bayes Evaluation Metrics")
90
+ st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Naive Bayes model.")
91
+ st.image("MNBclassification_report.png")
92
+
93
+ st.header("Logistic Regression Evaluation Metrics")
94
+ st.write("The image below represents the **Accuracy, F1 score, and classification report** of the Logistic Regression model.")
95
+ st.image("LRclassification_report.png")
96
+
97
+ st.header("SVM Evaluation Metrics")
98
+ st.write("The image below represents the **Accuracy, F1 score, and classification report** of the SVM model.")
99
+ st.image("SVM_classification_report.png")
100
 
101
+ # COMPARISON
102
 
103
+ st.header("Comparison")
104
+ st.write("Based on the confusion matrix and evaluation metrics, we can assume that out of the three classification algorithms chosen, Naive Bayes performs the best using this dataset")
confusion_matrix.png DELETED
Binary file (16.6 kB)
 
main.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
tfidf_vectorizer.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:856e0ba9a758d06ab564a5675d2f538c180786a5aecba0d03b3ed5c98fb10968
3
  size 78711
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b3264f32054f57cdda0912eaec6c6961c77902787d05dfe2255e0d532b5e55
3
  size 78711