Tobidx commited on
Commit
dda9786
·
verified ·
1 Parent(s): 4fe4aad

Upload 4 files

Browse files
spam_app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import xgboost as xgb
4
+ import numpy as np
5
+
6
+ def classify_email(email_text):
7
+ tfidf = joblib.load('tfidf_vectorizer.joblib')
8
+ model = joblib.load('spam_model.joblib')
9
+ email_tfidf = tfidf.transform([email_text])
10
+ email_dmatrix = xgb.DMatrix(email_tfidf)
11
+ prediction = model.predict(email_dmatrix)[0]
12
+ confidence = max(prediction, 1 - prediction)
13
+ label = "Spam" if prediction > 0.5 else "Not Spam"
14
+ return {label: float(confidence)}
15
+
16
+ def analyze_email(email_text):
17
+ tfidf = joblib.load('tfidf_vectorizer.joblib')
18
+ model = joblib.load('spam_model.joblib')
19
+ email_tfidf = tfidf.transform([email_text])
20
+ email_dmatrix = xgb.DMatrix(email_tfidf)
21
+ prediction = model.predict(email_dmatrix)[0]
22
+ confidence = max(prediction, 1 - prediction)
23
+ label = "Spam" if prediction > 0.5 else "Not Spam"
24
+
25
+ # Get feature importance
26
+ feature_names = tfidf.get_feature_names_out()
27
+ feature_importance = model.get_score(importance_type='gain')
28
+ top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:5]
29
+
30
+ analysis = f"Classification: {label} (Confidence: {confidence:.2%})\n\n"
31
+ analysis += "Top 5 influential words:\n"
32
+ for feature, importance in top_features:
33
+ if feature in email_text.lower():
34
+ analysis += f"- {feature}: {importance:.2f}\n"
35
+
36
+ return analysis
37
+
38
+ # Create Gradio interface
39
+ with gr.Blocks(css="footer {visibility: hidden}") as iface:
40
+ gr.Markdown(
41
+ """
42
+ # 🚀 Spam Email Classifier
43
+
44
+ Using Machine Learning to detect spam emails with high accuracy!
45
+ """
46
+ )
47
+ with gr.Row():
48
+ with gr.Column(scale=2):
49
+ email_input = gr.Textbox(lines=5, label="Enter email text")
50
+ with gr.Row():
51
+ classify_btn = gr.Button("Classify")
52
+ analyze_btn = gr.Button("Detailed Analysis")
53
+ with gr.Column(scale=1):
54
+ label_output = gr.Label(label="Classification")
55
+ analysis_output = gr.Textbox(label="Detailed Analysis", lines=8)
56
+
57
+ examples = [
58
+ ["Get fat quick! Buy our cheese burger now!"],
59
+ ["Hi Ajibola, let's go out on a date tonight"],
60
+ ["Congratulations! You've won a free iPhone. Click here to claim."],
61
+ ["Please find attached the report for Q2 sales figures."]
62
+ ]
63
+ gr.Examples(examples, inputs=email_input)
64
+
65
+ classify_btn.click(classify_email, inputs=email_input, outputs=label_output)
66
+ analyze_btn.click(analyze_email, inputs=email_input, outputs=analysis_output)
67
+
68
+ gr.Markdown(
69
+ """
70
+ ### How it works
71
+ This classifier uses an XGBoost model trained on a large dataset of over 190,000 emails.
72
+ The model achieved a 98% accuracy on the training data and 94% accuracy on the test data.
73
+ It analyzes the content and structure of the email to determine if it's spam or not.
74
+
75
+ ### Tips for use
76
+ - Enter the full text of the email for best results
77
+ - The 'Detailed Analysis' shows the top words influencing the classification
78
+ - Confidence score indicates how sure the model is about its prediction
79
+ """
80
+ )
81
+
82
+ # Launch the interface
83
+ iface.launch()
spam_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0c80f4df9fd5df5d070bcaf91424cdfd885d8270e5d2bc417753df1a0ab4b9
3
+ size 295684
spam_requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ joblib
3
+ xgboost
4
+ scikit-learn
5
+ numpy
tfidf_vectorizer.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0176aadd7ba5b4b3c4841a6dfabfcc04b85e17ed59325905d6bfbf02e6ec92e8
3
+ size 2326758