Spaces:
Sleeping
Sleeping
File size: 1,775 Bytes
1b15407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import streamlit as st
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
df = pd.read_csv(r"C:\Users\rajus\Downloads\spam.csv")
st.title("Identifying Spam and Ham Emails")
x = df["Message"]
y = df["Category"]
bow = CountVectorizer(stop_words="english")
final_data = pd.DataFrame(bow.fit_transform(x).toarray(), columns=bow.get_feature_names_out())
x_train, x_test, y_train, y_test = train_test_split(final_data, y, test_size=0.2, random_state=20)
models = {
"Naive Bayes": MultinomialNB(),
"KNN": KNeighborsClassifier(),
"Logistic Regression": LogisticRegression(),
"Decision Tree": DecisionTreeClassifier(),
"SVM": SVC()
}
model_choice = st.selectbox("Choose a Classification Algorithm", list(models.keys()))
obj = models[model_choice]
obj.fit(x_train, y_train)
y_pred = obj.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
if st.button("Show Accuracy"):
st.write(f"Accuracy of {model_choice}: {accuracy:.4f}")
email_input = st.text_input("Enter an Email for Prediction")
def predict_email(email):
data = bow.transform([email]).toarray()
prediction = obj.predict(data)[0]
st.write(f"Prediction: {prediction}")
if st.button("Predict Email"):
if email_input:
predict_email(email_input)
else:
st.write(":red[Please enter an email to classify]") |