|
import streamlit as st |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from sklearn.preprocessing import StandardScaler |
|
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster |
|
import numpy as np |
|
|
|
|
|
st.title("π§ Customer Cluster Explorer") |
|
st.markdown("#### Discover Groups of Customers Based on Their Income and Spending Habits using a Visual Tree", unsafe_allow_html=True) |
|
|
|
|
|
df = pd.read_csv("Mall_Customers.csv") |
|
|
|
|
|
X = df[['Annual Income (k$)', 'Spending Score (1-100)']] |
|
scaler = StandardScaler() |
|
X_scaled = scaler.fit_transform(X) |
|
|
|
|
|
linked = linkage(X_scaled, method='ward') |
|
df['Cluster'] = fcluster(linked, t=5, criterion='maxclust') |
|
|
|
|
|
tab1, tab2, tab3 = st.tabs([ |
|
"π Customer List", |
|
"π³ Customer Group Tree", |
|
"π Find My Customer Group" |
|
]) |
|
|
|
with tab1: |
|
st.header("π Customer Data") |
|
st.write("Here's a quick look at some customers and their income and spending habits.") |
|
st.dataframe(df[['CustomerID', 'Gender', 'Age', 'Annual Income (k$)', 'Spending Score (1-100)', 'Cluster']].head(10)) |
|
|
|
with tab2: |
|
st.header("π³ How Customers Are Grouped") |
|
st.write("This diagram shows how customers are grouped based on how similar they are in terms of spending and income.") |
|
fig, ax = plt.subplots(figsize=(10, 4)) |
|
dendrogram(linked, truncate_mode='lastp', p=12, leaf_rotation=45., leaf_font_size=12., ax=ax) |
|
st.pyplot(fig) |
|
|
|
with tab3: |
|
st.header("π Which Group Does a Customer Belong To?") |
|
st.write("Use the sliders below to try different values and see which customer group they might belong to.") |
|
|
|
income = st.slider("Customer's Annual Income (k$)", 15, 150, 40) |
|
score = st.slider("Customer's Spending Score (1β100)", 1, 100, 50) |
|
|
|
|
|
new_point = scaler.transform([[income, score]]) |
|
distances = np.linalg.norm(X_scaled - new_point, axis=1) |
|
closest_idx = distances.argmin() |
|
predicted_cluster = df.iloc[closest_idx]['Cluster'] |
|
|
|
st.success(f"This customer is likely in **Group {int(predicted_cluster)}**, based on similar spending and income behavior.") |
|
|