Spaces:
Sleeping
Sleeping
Commit
·
404ac46
1
Parent(s):
6074481
Added evaluation tab
Browse files- app.py +42 -17
- classification_report.png +0 -0
- confusion_matrix.png +0 -0
- main.ipynb +55 -22
- metrics.txt +12 -0
- tfidf_vectorizer.pkl +1 -1
app.py
CHANGED
@@ -24,20 +24,45 @@ def preprocess_text(text):
|
|
24 |
return " ".join(words)
|
25 |
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
return " ".join(words)
|
25 |
|
26 |
|
27 |
+
app, model_eval = st.tabs(["Application", "Model Evaluation"])
|
28 |
+
# STREAMLIT APP TAB 1
|
29 |
+
with app:
|
30 |
+
st.title("📩 Spam Detector App")
|
31 |
+
st.write("Enter a message below to check if it's **Spam** or **Not Spam**.")
|
32 |
+
|
33 |
+
|
34 |
+
user_input = st.text_area("Enter your message:")
|
35 |
+
|
36 |
+
if st.button("Check Spam"):
|
37 |
+
if user_input.strip():
|
38 |
+
processed_input = preprocess_text(user_input)
|
39 |
+
input_vector = vectorizer.transform([processed_input])
|
40 |
+
prediction = model.predict(input_vector)
|
41 |
+
|
42 |
+
result = "Spam" if prediction[0] == 1 else "Not Spam"
|
43 |
+
st.success(f"Prediction: {result}")
|
44 |
+
else:
|
45 |
+
st.warning("Please enter a message to check.")
|
46 |
+
|
47 |
+
with model_eval:
|
48 |
+
|
49 |
+
st.header("Model Evaluation")
|
50 |
+
st.write("The Spam Detection model was trained in order to detect if a message is considered a 'Spam' or 'Not Spam'. The dataset was taken from kaggle.")
|
51 |
+
st.write("dataset by Faisal Qureshi: https://www.kaggle.com/datasets/mfaisalqureshi/spam-email")
|
52 |
+
|
53 |
+
# CONFUSION MATRIX
|
54 |
+
st.title("Confusion Matrix")
|
55 |
+
st.write("The confusion matrix displays the actual values or true labels with the predicted values from the model. With this, we can identify the margin of error the model has. Consider the following when understanding the confusion matrix:")
|
56 |
+
st.write("True Positives (TP): Correctly predicted Spam")
|
57 |
+
st.write("True Negatives (TN): Correctly predicted Not Spam")
|
58 |
+
st.write("False Positives (FP): Predicted Spam but it was actually Not Spam (Type I error)")
|
59 |
+
st.write("False Negatives (FN): Predicted Ham but it was Spam (Type II error)")
|
60 |
+
st.image("confusion_matrix.png")
|
61 |
+
|
62 |
+
# EVALUATION MATRICS
|
63 |
+
st.title("Evaluation Metrics")
|
64 |
+
st.write("The image below represents the Accuracy, F1 score and the classification report of the model")
|
65 |
+
st.image("classification_report.png")
|
66 |
+
|
67 |
+
|
68 |
+
|
classification_report.png
ADDED
![]() |
confusion_matrix.png
ADDED
![]() |
main.ipynb
CHANGED
@@ -13,7 +13,7 @@
|
|
13 |
},
|
14 |
{
|
15 |
"cell_type": "code",
|
16 |
-
"execution_count":
|
17 |
"metadata": {},
|
18 |
"outputs": [
|
19 |
{
|
@@ -56,7 +56,7 @@
|
|
56 |
},
|
57 |
{
|
58 |
"cell_type": "code",
|
59 |
-
"execution_count":
|
60 |
"metadata": {},
|
61 |
"outputs": [],
|
62 |
"source": [
|
@@ -65,7 +65,7 @@
|
|
65 |
},
|
66 |
{
|
67 |
"cell_type": "code",
|
68 |
-
"execution_count":
|
69 |
"metadata": {},
|
70 |
"outputs": [],
|
71 |
"source": [
|
@@ -75,7 +75,7 @@
|
|
75 |
},
|
76 |
{
|
77 |
"cell_type": "code",
|
78 |
-
"execution_count":
|
79 |
"metadata": {},
|
80 |
"outputs": [
|
81 |
{
|
@@ -100,7 +100,7 @@
|
|
100 |
},
|
101 |
{
|
102 |
"cell_type": "code",
|
103 |
-
"execution_count":
|
104 |
"metadata": {},
|
105 |
"outputs": [
|
106 |
{
|
@@ -119,7 +119,7 @@
|
|
119 |
},
|
120 |
{
|
121 |
"cell_type": "code",
|
122 |
-
"execution_count":
|
123 |
"metadata": {},
|
124 |
"outputs": [
|
125 |
{
|
@@ -146,7 +146,7 @@
|
|
146 |
},
|
147 |
{
|
148 |
"cell_type": "code",
|
149 |
-
"execution_count":
|
150 |
"metadata": {},
|
151 |
"outputs": [],
|
152 |
"source": [
|
@@ -156,7 +156,7 @@
|
|
156 |
},
|
157 |
{
|
158 |
"cell_type": "code",
|
159 |
-
"execution_count":
|
160 |
"metadata": {},
|
161 |
"outputs": [],
|
162 |
"source": [
|
@@ -166,7 +166,7 @@
|
|
166 |
},
|
167 |
{
|
168 |
"cell_type": "code",
|
169 |
-
"execution_count":
|
170 |
"metadata": {},
|
171 |
"outputs": [],
|
172 |
"source": [
|
@@ -176,7 +176,7 @@
|
|
176 |
},
|
177 |
{
|
178 |
"cell_type": "code",
|
179 |
-
"execution_count":
|
180 |
"metadata": {},
|
181 |
"outputs": [],
|
182 |
"source": [
|
@@ -186,7 +186,7 @@
|
|
186 |
},
|
187 |
{
|
188 |
"cell_type": "code",
|
189 |
-
"execution_count":
|
190 |
"metadata": {},
|
191 |
"outputs": [
|
192 |
{
|
@@ -214,7 +214,7 @@
|
|
214 |
},
|
215 |
{
|
216 |
"cell_type": "code",
|
217 |
-
"execution_count":
|
218 |
"metadata": {},
|
219 |
"outputs": [],
|
220 |
"source": [
|
@@ -230,7 +230,7 @@
|
|
230 |
},
|
231 |
{
|
232 |
"cell_type": "code",
|
233 |
-
"execution_count":
|
234 |
"metadata": {},
|
235 |
"outputs": [],
|
236 |
"source": [
|
@@ -246,7 +246,7 @@
|
|
246 |
},
|
247 |
{
|
248 |
"cell_type": "code",
|
249 |
-
"execution_count":
|
250 |
"metadata": {},
|
251 |
"outputs": [],
|
252 |
"source": [
|
@@ -272,7 +272,7 @@
|
|
272 |
},
|
273 |
{
|
274 |
"cell_type": "code",
|
275 |
-
"execution_count":
|
276 |
"metadata": {},
|
277 |
"outputs": [],
|
278 |
"source": [
|
@@ -283,7 +283,7 @@
|
|
283 |
},
|
284 |
{
|
285 |
"cell_type": "code",
|
286 |
-
"execution_count":
|
287 |
"metadata": {},
|
288 |
"outputs": [
|
289 |
{
|
@@ -699,7 +699,7 @@
|
|
699 |
"MultinomialNB()"
|
700 |
]
|
701 |
},
|
702 |
-
"execution_count":
|
703 |
"metadata": {},
|
704 |
"output_type": "execute_result"
|
705 |
}
|
@@ -712,7 +712,7 @@
|
|
712 |
},
|
713 |
{
|
714 |
"cell_type": "code",
|
715 |
-
"execution_count":
|
716 |
"metadata": {},
|
717 |
"outputs": [],
|
718 |
"source": [
|
@@ -731,7 +731,7 @@
|
|
731 |
},
|
732 |
{
|
733 |
"cell_type": "code",
|
734 |
-
"execution_count":
|
735 |
"metadata": {},
|
736 |
"outputs": [
|
737 |
{
|
@@ -757,6 +757,8 @@
|
|
757 |
"source": [
|
758 |
"accuracy = accuracy_score(y_test, y_pred)\n",
|
759 |
"f1 = f1_score(y_test, y_pred)\n",
|
|
|
|
|
760 |
"print(f\"Accuracy: {accuracy:.4f}\")\n",
|
761 |
"print(f\"F1 Score: {f1:.4f}\")\n",
|
762 |
"print(\"\\nClassification Report:\\n\", classification_report(y_test, y_pred))\n"
|
@@ -764,7 +766,35 @@
|
|
764 |
},
|
765 |
{
|
766 |
"cell_type": "code",
|
767 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
768 |
"metadata": {},
|
769 |
"outputs": [
|
770 |
{
|
@@ -785,12 +815,15 @@
|
|
785 |
"plt.xlabel(\"Predicted\")\n",
|
786 |
"plt.ylabel(\"Actual\")\n",
|
787 |
"plt.title(\"Confusion Matrix\")\n",
|
788 |
-
"plt.
|
|
|
|
|
|
|
789 |
]
|
790 |
},
|
791 |
{
|
792 |
"cell_type": "code",
|
793 |
-
"execution_count":
|
794 |
"metadata": {},
|
795 |
"outputs": [
|
796 |
{
|
|
|
13 |
},
|
14 |
{
|
15 |
"cell_type": "code",
|
16 |
+
"execution_count": 22,
|
17 |
"metadata": {},
|
18 |
"outputs": [
|
19 |
{
|
|
|
56 |
},
|
57 |
{
|
58 |
"cell_type": "code",
|
59 |
+
"execution_count": 23,
|
60 |
"metadata": {},
|
61 |
"outputs": [],
|
62 |
"source": [
|
|
|
65 |
},
|
66 |
{
|
67 |
"cell_type": "code",
|
68 |
+
"execution_count": 24,
|
69 |
"metadata": {},
|
70 |
"outputs": [],
|
71 |
"source": [
|
|
|
75 |
},
|
76 |
{
|
77 |
"cell_type": "code",
|
78 |
+
"execution_count": 25,
|
79 |
"metadata": {},
|
80 |
"outputs": [
|
81 |
{
|
|
|
100 |
},
|
101 |
{
|
102 |
"cell_type": "code",
|
103 |
+
"execution_count": 26,
|
104 |
"metadata": {},
|
105 |
"outputs": [
|
106 |
{
|
|
|
119 |
},
|
120 |
{
|
121 |
"cell_type": "code",
|
122 |
+
"execution_count": 27,
|
123 |
"metadata": {},
|
124 |
"outputs": [
|
125 |
{
|
|
|
146 |
},
|
147 |
{
|
148 |
"cell_type": "code",
|
149 |
+
"execution_count": 28,
|
150 |
"metadata": {},
|
151 |
"outputs": [],
|
152 |
"source": [
|
|
|
156 |
},
|
157 |
{
|
158 |
"cell_type": "code",
|
159 |
+
"execution_count": 29,
|
160 |
"metadata": {},
|
161 |
"outputs": [],
|
162 |
"source": [
|
|
|
166 |
},
|
167 |
{
|
168 |
"cell_type": "code",
|
169 |
+
"execution_count": 30,
|
170 |
"metadata": {},
|
171 |
"outputs": [],
|
172 |
"source": [
|
|
|
176 |
},
|
177 |
{
|
178 |
"cell_type": "code",
|
179 |
+
"execution_count": 31,
|
180 |
"metadata": {},
|
181 |
"outputs": [],
|
182 |
"source": [
|
|
|
186 |
},
|
187 |
{
|
188 |
"cell_type": "code",
|
189 |
+
"execution_count": 32,
|
190 |
"metadata": {},
|
191 |
"outputs": [
|
192 |
{
|
|
|
214 |
},
|
215 |
{
|
216 |
"cell_type": "code",
|
217 |
+
"execution_count": 33,
|
218 |
"metadata": {},
|
219 |
"outputs": [],
|
220 |
"source": [
|
|
|
230 |
},
|
231 |
{
|
232 |
"cell_type": "code",
|
233 |
+
"execution_count": 34,
|
234 |
"metadata": {},
|
235 |
"outputs": [],
|
236 |
"source": [
|
|
|
246 |
},
|
247 |
{
|
248 |
"cell_type": "code",
|
249 |
+
"execution_count": 35,
|
250 |
"metadata": {},
|
251 |
"outputs": [],
|
252 |
"source": [
|
|
|
272 |
},
|
273 |
{
|
274 |
"cell_type": "code",
|
275 |
+
"execution_count": 36,
|
276 |
"metadata": {},
|
277 |
"outputs": [],
|
278 |
"source": [
|
|
|
283 |
},
|
284 |
{
|
285 |
"cell_type": "code",
|
286 |
+
"execution_count": 37,
|
287 |
"metadata": {},
|
288 |
"outputs": [
|
289 |
{
|
|
|
699 |
"MultinomialNB()"
|
700 |
]
|
701 |
},
|
702 |
+
"execution_count": 37,
|
703 |
"metadata": {},
|
704 |
"output_type": "execute_result"
|
705 |
}
|
|
|
712 |
},
|
713 |
{
|
714 |
"cell_type": "code",
|
715 |
+
"execution_count": 38,
|
716 |
"metadata": {},
|
717 |
"outputs": [],
|
718 |
"source": [
|
|
|
731 |
},
|
732 |
{
|
733 |
"cell_type": "code",
|
734 |
+
"execution_count": 39,
|
735 |
"metadata": {},
|
736 |
"outputs": [
|
737 |
{
|
|
|
757 |
"source": [
|
758 |
"accuracy = accuracy_score(y_test, y_pred)\n",
|
759 |
"f1 = f1_score(y_test, y_pred)\n",
|
760 |
+
"report = classification_report(y_test, y_pred)\n",
|
761 |
+
"\n",
|
762 |
"print(f\"Accuracy: {accuracy:.4f}\")\n",
|
763 |
"print(f\"F1 Score: {f1:.4f}\")\n",
|
764 |
"print(\"\\nClassification Report:\\n\", classification_report(y_test, y_pred))\n"
|
|
|
766 |
},
|
767 |
{
|
768 |
"cell_type": "code",
|
769 |
+
"execution_count": null,
|
770 |
+
"metadata": {},
|
771 |
+
"outputs": [
|
772 |
+
{
|
773 |
+
"data": {
|
774 |
+
"image/png": "",
|
775 |
+
"text/plain": [
|
776 |
+
"<Figure size 800x600 with 1 Axes>"
|
777 |
+
]
|
778 |
+
},
|
779 |
+
"metadata": {},
|
780 |
+
"output_type": "display_data"
|
781 |
+
}
|
782 |
+
],
|
783 |
+
"source": [
|
784 |
+
"text_output = f\"Accuracy: {accuracy:.4f}\\nF1 Score: {f1:.4f}\\n\\nClassification Report:\\n{report}\"\n",
|
785 |
+
"\n",
|
786 |
+
"plt.figure(figsize=(8, 6))\n",
|
787 |
+
"plt.text(0.01, 0.99, text_output, fontsize=12, ha='left', va='top', family=\"monospace\")\n",
|
788 |
+
"# Hide axes\n",
|
789 |
+
"plt.axis(\"off\")\n",
|
790 |
+
"\n",
|
791 |
+
"plt.savefig(\"classification_report.png\", bbox_inches=\"tight\", dpi=300)\n",
|
792 |
+
"plt.show()"
|
793 |
+
]
|
794 |
+
},
|
795 |
+
{
|
796 |
+
"cell_type": "code",
|
797 |
+
"execution_count": 41,
|
798 |
"metadata": {},
|
799 |
"outputs": [
|
800 |
{
|
|
|
815 |
"plt.xlabel(\"Predicted\")\n",
|
816 |
"plt.ylabel(\"Actual\")\n",
|
817 |
"plt.title(\"Confusion Matrix\")\n",
|
818 |
+
"plt.savefig(\"confusion_matrix.png\")\n",
|
819 |
+
"plt.show()\n",
|
820 |
+
"# Save the figure\n",
|
821 |
+
"plt.close() "
|
822 |
]
|
823 |
},
|
824 |
{
|
825 |
"cell_type": "code",
|
826 |
+
"execution_count": 42,
|
827 |
"metadata": {},
|
828 |
"outputs": [
|
829 |
{
|
metrics.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Accuracy: 0.9398
|
2 |
+
F1 Score: 0.9375
|
3 |
+
|
4 |
+
Classification Report:
|
5 |
+
precision recall f1-score support
|
6 |
+
|
7 |
+
0 0.91 0.97 0.94 150
|
8 |
+
1 0.97 0.91 0.94 149
|
9 |
+
|
10 |
+
accuracy 0.94 299
|
11 |
+
macro avg 0.94 0.94 0.94 299
|
12 |
+
weighted avg 0.94 0.94 0.94 299
|
tfidf_vectorizer.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 78711
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:856e0ba9a758d06ab564a5675d2f538c180786a5aecba0d03b3ed5c98fb10968
|
3 |
size 78711
|