Spaces:

michaelryt
/

Mall_Customer_Segmentation_Using_Hierarchical_Clustering

Sleeping

App Files Files Community

Mall_Customer_Segmentation_Using_Hierarchical_Clustering / app.py

Michael Rey

added app.py

1e4147d 2 months ago

history blame contribute delete

2.19 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	from sklearn.preprocessing import StandardScaler
	from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
	import numpy as np

	# Title
	st.title("🧍 Customer Cluster Explorer")
	st.markdown("#### Discover Groups of Customers Based on Their Income and Spending Habits using a Visual Tree", unsafe_allow_html=True)

	# Load dataset
	df = pd.read_csv("Mall_Customers.csv")

	# Prepare data
	X = df[['Annual Income (k$)', 'Spending Score (1-100)']]
	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(X)

	# Build customer clusters using a visual tree method
	linked = linkage(X_scaled, method='ward')
	df['Cluster'] = fcluster(linked, t=5, criterion='maxclust')

	# Create tabs
	tab1, tab2, tab3 = st.tabs([
	"📋 Customer List",
	"🌳 Customer Group Tree",
	"🔍 Find My Customer Group"
	])

	with tab1:
	st.header("📋 Customer Data")
	st.write("Here's a quick look at some customers and their income and spending habits.")
	st.dataframe(df[['CustomerID', 'Gender', 'Age', 'Annual Income (k$)', 'Spending Score (1-100)', 'Cluster']].head(10))

	with tab2:
	st.header("🌳 How Customers Are Grouped")
	st.write("This diagram shows how customers are grouped based on how similar they are in terms of spending and income.")
	fig, ax = plt.subplots(figsize=(10, 4))
	dendrogram(linked, truncate_mode='lastp', p=12, leaf_rotation=45., leaf_font_size=12., ax=ax)
	st.pyplot(fig)

	with tab3:
	st.header("🔍 Which Group Does a Customer Belong To?")
	st.write("Use the sliders below to try different values and see which customer group they might belong to.")

	income = st.slider("Customer's Annual Income (k$)", 15, 150, 40)
	score = st.slider("Customer's Spending Score (1–100)", 1, 100, 50)

	# Compare with existing customers
	new_point = scaler.transform([[income, score]])
	distances = np.linalg.norm(X_scaled - new_point, axis=1)
	closest_idx = distances.argmin()
	predicted_cluster = df.iloc[closest_idx]['Cluster']

	st.success(f"This customer is likely in Group {int(predicted_cluster)}, based on similar spending and income behavior.")