Michael Rey
modified code
7e08452
raw
history blame
2.72 kB
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Load dataset
df = pd.read_csv("Social_Network_Ads.csv")
df = df.drop(columns=["User ID"]) # Remove User ID
# App Title
st.title("πŸ’Ό Social Network Ads - Customer Purchase Prediction")
st.write("### Predict if a user will purchase a product based on Age & Salary.")
# Dataset Preview
st.write("#### Dataset Preview:")
st.dataframe(df.head())
# Data Distribution
st.write("#### Data Distribution")
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
sns.histplot(df["Age"], bins=20, kde=True, ax=ax[0], color="blue")
ax[0].set_title("Age Distribution")
sns.histplot(df["EstimatedSalary"], bins=20, kde=True, ax=ax[1], color="green")
ax[1].set_title("Salary Distribution")
st.pyplot(fig)
# Preprocessing
X = df[["Age", "EstimatedSalary"]]
y = df["Purchased"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
# Model Performance
st.write("### πŸ“Š Model Performance")
st.write(f"**Model Accuracy:** {accuracy:.2f}")
st.write("#### Classification Report:")
st.text(classification_report(y_test, y_pred))
st.write("#### Confusion Matrix:")
fig, ax = plt.subplots()
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Purchased", "Purchased"], yticklabels=["Not Purchased", "Purchased"])
st.pyplot(fig)
# Prediction
st.write("### πŸ€– Try the Model")
st.write("Enter details to check if a customer will purchase.")
age = st.slider("Select Age", min_value=int(X["Age"].min()), max_value=int(X["Age"].max()), value=30)
salary = st.slider("Select Estimated Salary", min_value=int(X["EstimatedSalary"].min()), max_value=int(X["EstimatedSalary"].max()), value=50000)
if st.button("Predict Purchase"):
input_data = scaler.transform([[age, salary]])
prediction = model.predict(input_data)[0]
prediction_proba = model.predict_proba(input_data)[0]
st.subheader("Prediction Result")
result_text = "Yes! The user is likely to purchase." if prediction == 1 else "No, the user is not likely to purchase."
st.write(f"Confidence: {prediction_proba[prediction]:.2f}")