Spaces:

Tobidx
/

Spam-email-classifier

Sleeping

App Files Files Community

Spam-email-classifier / app.py

Tobidx

Rename spam_app.py to app.py

baea549 verified 10 months ago

raw

history blame

3.19 kB

	import gradio as gr
	import joblib
	import xgboost as xgb
	import numpy as np

	def classify_email(email_text):
	tfidf = joblib.load('tfidf_vectorizer.joblib')
	model = joblib.load('spam_model.joblib')
	email_tfidf = tfidf.transform([email_text])
	email_dmatrix = xgb.DMatrix(email_tfidf)
	prediction = model.predict(email_dmatrix)[0]
	confidence = max(prediction, 1 - prediction)
	label = "Spam" if prediction > 0.5 else "Not Spam"
	return {label: float(confidence)}

	def analyze_email(email_text):
	tfidf = joblib.load('tfidf_vectorizer.joblib')
	model = joblib.load('spam_model.joblib')
	email_tfidf = tfidf.transform([email_text])
	email_dmatrix = xgb.DMatrix(email_tfidf)
	prediction = model.predict(email_dmatrix)[0]
	confidence = max(prediction, 1 - prediction)
	label = "Spam" if prediction > 0.5 else "Not Spam"

	# Get feature importance
	feature_names = tfidf.get_feature_names_out()
	feature_importance = model.get_score(importance_type='gain')
	top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:5]

	analysis = f"Classification: {label} (Confidence: {confidence:.2%})\n\n"
	analysis += "Top 5 influential words:\n"
	for feature, importance in top_features:
	if feature in email_text.lower():
	analysis += f"- {feature}: {importance:.2f}\n"

	return analysis

	# Create Gradio interface
	with gr.Blocks(css="footer {visibility: hidden}") as iface:
	gr.Markdown(
	"""
	# 🚀 Spam Email Classifier

	Using Machine Learning to detect spam emails with high accuracy!
	"""
	)
	with gr.Row():
	with gr.Column(scale=2):
	email_input = gr.Textbox(lines=5, label="Enter email text")
	with gr.Row():
	classify_btn = gr.Button("Classify")
	analyze_btn = gr.Button("Detailed Analysis")
	with gr.Column(scale=1):
	label_output = gr.Label(label="Classification")
	analysis_output = gr.Textbox(label="Detailed Analysis", lines=8)

	examples = [
	["Get fat quick! Buy our cheese burger now!"],
	["Hi Ajibola, let's go out on a date tonight"],
	["Congratulations! You've won a free iPhone. Click here to claim."],
	["Please find attached the report for Q2 sales figures."]
	]
	gr.Examples(examples, inputs=email_input)

	classify_btn.click(classify_email, inputs=email_input, outputs=label_output)
	analyze_btn.click(analyze_email, inputs=email_input, outputs=analysis_output)

	gr.Markdown(
	"""
	### How it works
	This classifier uses an XGBoost model trained on a large dataset of over 190,000 emails.
	The model achieved a 98% accuracy on the training data and 94% accuracy on the test data.
	It analyzes the content and structure of the email to determine if it's spam or not.

	### Tips for use
	- Enter the full text of the email for best results
	- The 'Detailed Analysis' shows the top words influencing the classification
	- Confidence score indicates how sure the model is about its prediction
	"""
	)

	# Launch the interface
	iface.launch()