Spaces:
Sleeping
Sleeping
import gradio as gr | |
import joblib | |
import xgboost as xgb | |
import numpy as np | |
def classify_email(email_text): | |
tfidf = joblib.load('tfidf_vectorizer.joblib') | |
model = joblib.load('spam_model.joblib') | |
email_tfidf = tfidf.transform([email_text]) | |
email_dmatrix = xgb.DMatrix(email_tfidf) | |
prediction = model.predict(email_dmatrix)[0] | |
confidence = max(prediction, 1 - prediction) | |
label = "Spam" if prediction > 0.5 else "Not Spam" | |
return {label: float(confidence)} | |
def analyze_email(email_text): | |
tfidf = joblib.load('tfidf_vectorizer.joblib') | |
model = joblib.load('spam_model.joblib') | |
email_tfidf = tfidf.transform([email_text]) | |
email_dmatrix = xgb.DMatrix(email_tfidf) | |
prediction = model.predict(email_dmatrix)[0] | |
confidence = max(prediction, 1 - prediction) | |
label = "Spam" if prediction > 0.5 else "Not Spam" | |
# Get feature importance | |
feature_names = tfidf.get_feature_names_out() | |
feature_importance = model.get_score(importance_type='gain') | |
top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:5] | |
analysis = f"Classification: {label} (Confidence: {confidence:.2%})\n\n" | |
analysis += "Top 5 influential words:\n" | |
for feature, importance in top_features: | |
if feature in email_text.lower(): | |
analysis += f"- {feature}: {importance:.2f}\n" | |
return analysis | |
# Create Gradio interface | |
with gr.Blocks(css="footer {visibility: hidden}") as iface: | |
gr.Markdown( | |
""" | |
# π Spam Email Classifier | |
Using Machine Learning to detect spam emails with high accuracy! | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=2): | |
email_input = gr.Textbox(lines=5, label="Enter email text") | |
with gr.Row(): | |
classify_btn = gr.Button("Classify") | |
analyze_btn = gr.Button("Detailed Analysis") | |
with gr.Column(scale=1): | |
label_output = gr.Label(label="Classification") | |
analysis_output = gr.Textbox(label="Detailed Analysis", lines=8) | |
examples = [ | |
["Get fat quick! Buy our cheese burger now!"], | |
["Hi Ajibola, let's go out on a date tonight"], | |
["Congratulations! You've won a free iPhone. Click here to claim."], | |
["Please find attached the report for Q2 sales figures."] | |
] | |
gr.Examples(examples, inputs=email_input) | |
classify_btn.click(classify_email, inputs=email_input, outputs=label_output) | |
analyze_btn.click(analyze_email, inputs=email_input, outputs=analysis_output) | |
gr.Markdown( | |
""" | |
### How it works | |
This classifier uses an XGBoost model trained on a large dataset of over 190,000 emails. | |
The model achieved a 98% accuracy on the training data and 94% accuracy on the test data. | |
It analyzes the content and structure of the email to determine if it's spam or not. | |
### Tips for use | |
- Enter the full text of the email for best results | |
- The 'Detailed Analysis' shows the top words influencing the classification | |
- Confidence score indicates how sure the model is about its prediction | |
""" | |
) | |
# Launch the interface | |
iface.launch() | |