import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, classification_report, confusion_matrix # Load dataset df = pd.read_csv("Social_Network_Ads.csv") df = df.drop(columns=["User ID"]) # Remove User ID # App Title st.title("Social Network Ads - Customer Purchase Prediction") st.write("#### Predict if a user will purchase a product based on Age & Salary using Logistic Regression.") # Dataset Preview st.write("#### Dataset Preview:") st.dataframe(df.head()) # Data Distribution st.write("#### Data Distribution") fig, ax = plt.subplots(1, 2, figsize=(12, 5)) sns.histplot(df["Age"], bins=20, kde=True, ax=ax[0], color="blue") ax[0].set_title("Age Distribution") sns.histplot(df["EstimatedSalary"], bins=20, kde=True, ax=ax[1], color="green") ax[1].set_title("Salary Distribution") st.pyplot(fig) # Preprocessing X = df[["Age", "EstimatedSalary"]] y = df["Purchased"] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Train Logistic Regression Model model = LogisticRegression() model.fit(X_train, y_train) y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) conf_matrix = confusion_matrix(y_test, y_pred) # Model Performance st.write("### 📊 Model Performance") st.write(f"**Model Accuracy:** {accuracy:.2f}") st.write("#### Classification Report:") st.text(classification_report(y_test, y_pred)) st.write("#### Confusion Matrix:") fig, ax = plt.subplots() sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Purchased", "Purchased"], yticklabels=["Not Purchased", "Purchased"]) st.pyplot(fig) # Prediction st.write("### 🤖 Try the Model") st.write("Enter details to check if a customer will purchase.") age = st.slider("Select Age", min_value=int(X["Age"].min()), max_value=int(X["Age"].max()), value=30) salary = st.slider("Select Estimated Salary", min_value=int(X["EstimatedSalary"].min()), max_value=int(X["EstimatedSalary"].max()), value=50000) if st.button("Predict Purchase"): input_data = scaler.transform([[age, salary]]) prediction = model.predict(input_data)[0] prediction_proba = model.predict_proba(input_data)[0] st.subheader("Prediction Result") result_text = "Yes! The user is likely to purchase." if prediction == 1 else "No, the user is not likely to purchase." st.success(result_text) if prediction == 1 else st.warning(result_text) st.write(f"Confidence: {prediction_proba[prediction]:.2f}")