Michael Rey
made changes again
368369d
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Load dataset
df = pd.read_csv("Social_Network_Ads.csv")
df = df.drop(columns=["User ID"]) # Remove User ID
# App Title
st.title("Social Network Ads - Customer Purchase Prediction")
st.write("#### Predict if a user will purchase a product based on Age & Salary using Logistic Regression.")
# Dataset Preview
st.write("#### Dataset Preview:")
st.dataframe(df.head())
# Data Distribution
st.write("#### Data Distribution")
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
sns.histplot(df["Age"], bins=20, kde=True, ax=ax[0], color="blue")
ax[0].set_title("Age Distribution")
sns.histplot(df["EstimatedSalary"], bins=20, kde=True, ax=ax[1], color="green")
ax[1].set_title("Salary Distribution")
st.pyplot(fig)
# Preprocessing
X = df[["Age", "EstimatedSalary"]]
y = df["Purchased"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
# Model Performance
st.write("### πŸ“Š Model Performance")
st.write(f"**Model Accuracy:** {accuracy:.2f}")
st.write("#### Classification Report:")
st.text(classification_report(y_test, y_pred))
st.write("#### Confusion Matrix:")
fig, ax = plt.subplots()
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Purchased", "Purchased"], yticklabels=["Not Purchased", "Purchased"])
st.pyplot(fig)
# Prediction
st.write("### πŸ€– Try the Model")
st.write("Enter details to check if a customer will purchase.")
age = st.slider("Select Age", min_value=int(X["Age"].min()), max_value=int(X["Age"].max()), value=30)
salary = st.slider("Select Estimated Salary", min_value=int(X["EstimatedSalary"].min()), max_value=int(X["EstimatedSalary"].max()), value=50000)
if st.button("Predict Purchase"):
input_data = scaler.transform([[age, salary]])
prediction = model.predict(input_data)[0]
prediction_proba = model.predict_proba(input_data)[0]
st.subheader("Prediction Result")
result_text = "Yes! The user is likely to purchase." if prediction == 1 else "No, the user is not likely to purchase."
st.success(result_text) if prediction == 1 else st.warning(result_text)
st.write(f"Confidence: {prediction_proba[prediction]:.2f}")