Spaces:

sklearn-docs
/

k-means-initialization-evaluation

Sleeping

App Files Files Community

k-means-initialization-evaluation / app.py

RamAnanth1

Update app.py

2df0d19 over 2 years ago

raw

history blame

4.78 kB

	import gradio as gr
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.model_selection import train_test_split

	import matplotlib.cm as cm
	from sklearn.utils import shuffle
	from sklearn.utils import check_random_state
	from sklearn.cluster import MiniBatchKMeans
	from sklearn.cluster import KMeans

	theme = gr.themes.Monochrome(
	primary_hue="indigo",
	secondary_hue="blue",
	neutral_hue="slate",
	)

	description = f"""
	## Description
	This demo can be used to evaluate the ability of k-means initializations strategies to make the algorithm convergence robust
	"""

	# k-means models can do several random inits so as to be able to trade
	# CPU time for convergence robustness
	n_init_range = np.array([1, 5, 10, 15, 20])

	# Datasets generation parameters
	scale = 0.1

	def make_data(random_state, n_samples_per_center, grid_size, scale):
	random_state = check_random_state(random_state)
	centers = np.array([[i, j] for i in range(grid_size) for j in range(grid_size)])
	n_clusters_true, n_features = centers.shape

	noise = random_state.normal(
	scale=scale, size=(n_samples_per_center, centers.shape[1])
	)

	X = np.concatenate([c + noise for c in centers])
	y = np.concatenate([[i] * n_samples_per_center for i in range(n_clusters_true)])
	return shuffle(X, y, random_state=random_state)

	def quant_evaluation(n_runs, n_samples_per_center, grid_size):

	n_clusters = grid_size**2

	plt.figure()
	plots = []
	legends = []

	cases = [
	(KMeans, "k-means++", {}, "^-"),
	(KMeans, "random", {}, "o-"),
	(MiniBatchKMeans, "k-means++", {"max_no_improvement": 3}, "x-"),
	(MiniBatchKMeans, "random", {"max_no_improvement": 3, "init_size": 500}, "d-"),
	]

	for factory, init, params, format in cases:
	print("Evaluation of %s with %s init" % (factory.__name__, init))
	inertia = np.empty((len(n_init_range), n_runs))

	for run_id in range(n_runs):
	X, y = make_data(run_id, n_samples_per_center, grid_size, scale)
	for i, n_init in enumerate(n_init_range):
	km = factory(
	n_clusters=n_clusters,
	init=init,
	random_state=run_id,
	n_init=n_init,
	**params,
	).fit(X)
	inertia[i, run_id] = km.inertia_
	p = plt.errorbar(
	n_init_range, inertia.mean(axis=1), inertia.std(axis=1), fmt=format
	)
	plots.append(p[0])
	legends.append("%s with %s init" % (factory.__name__, init))

	plt.xlabel("n_init")
	plt.ylabel("inertia")
	plt.legend(plots, legends)
	plt.title("Mean inertia for various k-means init across %d runs" % n_runs)
	return plt

	def qual_evaluation(random_state, n_samples_per_center, grid_size):
	n_clusters = grid_size**2
	X, y = make_data(random_state, n_samples_per_center, grid_size, scale)
	km = MiniBatchKMeans(
	n_clusters=n_clusters, init="random", n_init=1, random_state=random_state
	).fit(X)

	plt.figure()
	for k in range(n_clusters):
	my_members = km.labels_ == k
	color = cm.nipy_spectral(float(k) / n_clusters, 1)
	plt.plot(X[my_members, 0], X[my_members, 1], ".", c=color)
	cluster_center = km.cluster_centers_[k]
	plt.plot(
	cluster_center[0],
	cluster_center[1],
	"o",
	markerfacecolor=color,
	markeredgecolor="k",
	markersize=6,
	)
	plt.title(
	"Example cluster allocation with a single random init\nwith MiniBatchKMeans"
	)
	return plt

	with gr.Blocks(theme=theme) as demo:
	gr.Markdown('''
	<h1 style='text-align: center'>Empirical evaluation of the impact of k-means initialization 📊</h1>
	''')
	gr.Markdown(description)
	with gr.Row():
	n_runs = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of Evaluation Runs")
	random_state = gr.Slider(minimum=0, maximum=2000, step=5, value=0, label="Random state")
	n_samples_per_center = gr.Slider(minimum=50, maximum=200, step=10, value=100, label="Number of Samples per Center")
	grid_size = gr.Slider(minimum=1, maximum=8, step=1, value=3, label="Grid Size")

	with gr.Row():
	run_button = gr.Button('Evaluate Inertia')
	run_button_qual = gr.Button('Generate Cluster Allocations')
	with gr.Row():
	plot_inertia = gr.Plot()
	plot_vis = gr.Plot()
	run_button.click(fn=quant_evaluation, inputs=[n_runs, n_samples_per_center, grid_size], outputs=plot_inertia)
	run_button_qual.click(fn=qual_evaluation, inputs=[random_state, n_samples_per_center, grid_size], outputs=plot_vis)

	demo.launch()