File size: 2,716 Bytes
759a794 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Load dataset
df = pd.read_csv("Social_Network_Ads.csv")
df = df.drop(columns=["User ID"]) # Remove User ID
# App Title
st.title("๐ผ Social Network Ads - Customer Purchase Prediction")
st.write("### Predict if a user will purchase a product based on Age & Salary.")
# Dataset Preview
st.write("#### Dataset Preview:")
st.dataframe(df.head())
# Data Distribution
st.write("#### Data Distribution")
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
sns.histplot(df["Age"], bins=20, kde=True, ax=ax[0], color="blue")
ax[0].set_title("Age Distribution")
sns.histplot(df["EstimatedSalary"], bins=20, kde=True, ax=ax[1], color="green")
ax[1].set_title("Salary Distribution")
st.pyplot(fig)
# Preprocessing
X = df[["Age", "EstimatedSalary"]]
y = df["Purchased"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
# Model Performance
st.write("### ๐ Model Performance")
st.write(f"**Model Accuracy:** {accuracy:.2f}")
st.write("#### Classification Report:")
st.text(classification_report(y_test, y_pred))
st.write("#### Confusion Matrix:")
fig, ax = plt.subplots()
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Purchased", "Purchased"], yticklabels=["Not Purchased", "Purchased"])
st.pyplot(fig)
# Prediction
st.write("### ๐ค Try the Model")
st.write("Enter details to check if a customer will purchase.")
age = st.slider("Select Age", min_value=int(X["Age"].min()), max_value=int(X["Age"].max()), value=30)
salary = st.slider("Select Estimated Salary", min_value=int(X["EstimatedSalary"].min()), max_value=int(X["EstimatedSalary"].max()), value=50000)
if st.button("Predict Purchase"):
input_data = scaler.transform([[age, salary]])
prediction = model.predict(input_data)[0]
prediction_proba = model.predict_proba(input_data)[0]
st.subheader("Prediction Result")
result_text = "Yes! The user is likely to purchase." if prediction == 1 else "No, the user is not likely to purchase."
st.write(f"Confidence: {prediction_proba[prediction]:.2f}")
|