Michael Rey
added app.py
1e4147d
raw
history blame contribute delete
2.19 kB
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
import numpy as np
# Title
st.title("🧍 Customer Cluster Explorer")
st.markdown("#### Discover Groups of Customers Based on Their Income and Spending Habits using a Visual Tree", unsafe_allow_html=True)
# Load dataset
df = pd.read_csv("Mall_Customers.csv")
# Prepare data
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Build customer clusters using a visual tree method
linked = linkage(X_scaled, method='ward')
df['Cluster'] = fcluster(linked, t=5, criterion='maxclust')
# Create tabs
tab1, tab2, tab3 = st.tabs([
"πŸ“‹ Customer List",
"🌳 Customer Group Tree",
"πŸ” Find My Customer Group"
])
with tab1:
st.header("πŸ“‹ Customer Data")
st.write("Here's a quick look at some customers and their income and spending habits.")
st.dataframe(df[['CustomerID', 'Gender', 'Age', 'Annual Income (k$)', 'Spending Score (1-100)', 'Cluster']].head(10))
with tab2:
st.header("🌳 How Customers Are Grouped")
st.write("This diagram shows how customers are grouped based on how similar they are in terms of spending and income.")
fig, ax = plt.subplots(figsize=(10, 4))
dendrogram(linked, truncate_mode='lastp', p=12, leaf_rotation=45., leaf_font_size=12., ax=ax)
st.pyplot(fig)
with tab3:
st.header("πŸ” Which Group Does a Customer Belong To?")
st.write("Use the sliders below to try different values and see which customer group they might belong to.")
income = st.slider("Customer's Annual Income (k$)", 15, 150, 40)
score = st.slider("Customer's Spending Score (1–100)", 1, 100, 50)
# Compare with existing customers
new_point = scaler.transform([[income, score]])
distances = np.linalg.norm(X_scaled - new_point, axis=1)
closest_idx = distances.argmin()
predicted_cluster = df.iloc[closest_idx]['Cluster']
st.success(f"This customer is likely in **Group {int(predicted_cluster)}**, based on similar spending and income behavior.")