Spaces:
Sleeping
Sleeping
File size: 2,806 Bytes
e9ab3e1 1d64e70 e9ab3e1 1d64e70 e9ab3e1 1d64e70 e9ab3e1 1d64e70 e9ab3e1 1d64e70 e9ab3e1 1d64e70 e9ab3e1 1d64e70 e9ab3e1 1d64e70 e9ab3e1 1d64e70 e9ab3e1 1d64e70 e9ab3e1 1d64e70 e9ab3e1 1d64e70 a94a43d c01c8de 1d64e70 c01c8de 1d64e70 e9ab3e1 1d64e70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import streamlit as st
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from fastapi import FastAPI
from fastapi.responses import JSONResponse
import threading
# Read dataset
df = pd.read_csv(r"spam.csv")
# Initialize Streamlit app
st.title("Identifying Spam and Ham Emails")
# Define feature and target variables
x = df["Message"]
y = df["Category"]
# Create a Bag of Words (BoW) model
bow = CountVectorizer(stop_words="english")
final_data = pd.DataFrame(bow.fit_transform(x).toarray(), columns=bow.get_feature_names_out())
# Train-test split
x_train, x_test, y_train, y_test = train_test_split(final_data, y, test_size=0.2, random_state=20)
# Initialize models
models = {
"Naive Bayes": MultinomialNB(),
"KNN": KNeighborsClassifier(),
"Logistic Regression": LogisticRegression(),
"Decision Tree": DecisionTreeClassifier(),
"SVM": SVC()
}
# Model selection
model_choice = st.selectbox("Choose a Classification Algorithm", list(models.keys()))
# Train the selected model
obj = models[model_choice]
obj.fit(x_train, y_train)
y_pred = obj.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
# Display accuracy
if st.button("Show Accuracy"):
st.write(f"Accuracy of {model_choice}: {accuracy:.4f}")
# Email input and prediction function
email_input = st.text_input("Enter an Email for Prediction")
def predict_email(email):
data = bow.transform([email]).toarray()
prediction = obj.predict(data)[0]
st.write(f"Prediction: {prediction}")
if st.button("Predict Email"):
if email_input:
predict_email(email_input)
else:
st.write(":red[Please enter an email to classify]")
# FastAPI app to handle GET requests
app = FastAPI()
@app.get("predict/")
def predict_spam(email: str):
"""
This endpoint predicts whether the email is Spam or Ham.
Query parameter: email (str) - The email text to be classified.
"""
data = bow.transform([email]).toarray()
prediction = obj.predict(data)[0]
return JSONResponse(content={"prediction": prediction})
# Running FastAPI in a separate thread to work alongside Streamlit
def run_api():
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
# Start FastAPI in a separate thread
api_thread = threading.Thread(target=run_api, daemon=True)
api_thread.start()
# You can also check API response using the link below:
# http://localhost:8000/predict/?email=Your_email_text_here
|