File size: 2,191 Bytes
1e4147d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
import numpy as np
# Title
st.title("π§ Customer Cluster Explorer")
st.markdown("#### Discover Groups of Customers Based on Their Income and Spending Habits using a Visual Tree", unsafe_allow_html=True)
# Load dataset
df = pd.read_csv("Mall_Customers.csv")
# Prepare data
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Build customer clusters using a visual tree method
linked = linkage(X_scaled, method='ward')
df['Cluster'] = fcluster(linked, t=5, criterion='maxclust')
# Create tabs
tab1, tab2, tab3 = st.tabs([
"π Customer List",
"π³ Customer Group Tree",
"π Find My Customer Group"
])
with tab1:
st.header("π Customer Data")
st.write("Here's a quick look at some customers and their income and spending habits.")
st.dataframe(df[['CustomerID', 'Gender', 'Age', 'Annual Income (k$)', 'Spending Score (1-100)', 'Cluster']].head(10))
with tab2:
st.header("π³ How Customers Are Grouped")
st.write("This diagram shows how customers are grouped based on how similar they are in terms of spending and income.")
fig, ax = plt.subplots(figsize=(10, 4))
dendrogram(linked, truncate_mode='lastp', p=12, leaf_rotation=45., leaf_font_size=12., ax=ax)
st.pyplot(fig)
with tab3:
st.header("π Which Group Does a Customer Belong To?")
st.write("Use the sliders below to try different values and see which customer group they might belong to.")
income = st.slider("Customer's Annual Income (k$)", 15, 150, 40)
score = st.slider("Customer's Spending Score (1β100)", 1, 100, 50)
# Compare with existing customers
new_point = scaler.transform([[income, score]])
distances = np.linalg.norm(X_scaled - new_point, axis=1)
closest_idx = distances.argmin()
predicted_cluster = df.iloc[closest_idx]['Cluster']
st.success(f"This customer is likely in **Group {int(predicted_cluster)}**, based on similar spending and income behavior.")
|