File size: 2,716 Bytes
759a794
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
df = pd.read_csv("Social_Network_Ads.csv")
df = df.drop(columns=["User ID"])  # Remove User ID

# App Title
st.title("๐Ÿ’ผ Social Network Ads - Customer Purchase Prediction")
st.write("### Predict if a user will purchase a product based on Age & Salary.")

# Dataset Preview
st.write("#### Dataset Preview:")
st.dataframe(df.head())

# Data Distribution
st.write("#### Data Distribution")
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
sns.histplot(df["Age"], bins=20, kde=True, ax=ax[0], color="blue")
ax[0].set_title("Age Distribution")
sns.histplot(df["EstimatedSalary"], bins=20, kde=True, ax=ax[1], color="green")
ax[1].set_title("Salary Distribution")
st.pyplot(fig)

# Preprocessing
X = df[["Age", "EstimatedSalary"]]
y = df["Purchased"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Model Performance
st.write("### ๐Ÿ“Š Model Performance")
st.write(f"**Model Accuracy:** {accuracy:.2f}")
st.write("#### Classification Report:")
st.text(classification_report(y_test, y_pred))

st.write("#### Confusion Matrix:")
fig, ax = plt.subplots()
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Purchased", "Purchased"], yticklabels=["Not Purchased", "Purchased"])
st.pyplot(fig)

# Prediction
st.write("### ๐Ÿค– Try the Model")
st.write("Enter details to check if a customer will purchase.")

age = st.slider("Select Age", min_value=int(X["Age"].min()), max_value=int(X["Age"].max()), value=30)
salary = st.slider("Select Estimated Salary", min_value=int(X["EstimatedSalary"].min()), max_value=int(X["EstimatedSalary"].max()), value=50000)

if st.button("Predict Purchase"):
    input_data = scaler.transform([[age, salary]])
    prediction = model.predict(input_data)[0]
    prediction_proba = model.predict_proba(input_data)[0]
    
    st.subheader("Prediction Result")
    result_text = "Yes! The user is likely to purchase." if prediction == 1 else "No, the user is not likely to purchase."
    st.write(f"Confidence: {prediction_proba[prediction]:.2f}")