|
import gradio as gr |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from sklearn.model_selection import train_test_split |
|
|
|
import matplotlib.cm as cm |
|
from sklearn.utils import shuffle |
|
from sklearn.utils import check_random_state |
|
from sklearn.cluster import MiniBatchKMeans |
|
from sklearn.cluster import KMeans |
|
|
|
theme = gr.themes.Monochrome( |
|
primary_hue="indigo", |
|
secondary_hue="blue", |
|
neutral_hue="slate", |
|
) |
|
|
|
description = f""" |
|
## Description |
|
This demo can be used to evaluate the ability of k-means initializations strategies to make the algorithm convergence robust |
|
""" |
|
|
|
|
|
|
|
n_init_range = np.array([1, 5, 10, 15, 20]) |
|
|
|
|
|
scale = 0.1 |
|
|
|
def make_data(random_state, n_samples_per_center, grid_size, scale): |
|
random_state = check_random_state(random_state) |
|
centers = np.array([[i, j] for i in range(grid_size) for j in range(grid_size)]) |
|
n_clusters_true, n_features = centers.shape |
|
|
|
noise = random_state.normal( |
|
scale=scale, size=(n_samples_per_center, centers.shape[1]) |
|
) |
|
|
|
X = np.concatenate([c + noise for c in centers]) |
|
y = np.concatenate([[i] * n_samples_per_center for i in range(n_clusters_true)]) |
|
return shuffle(X, y, random_state=random_state) |
|
|
|
def quant_evaluation(n_runs, n_samples_per_center, grid_size): |
|
|
|
n_clusters = grid_size**2 |
|
|
|
plt.figure() |
|
plots = [] |
|
legends = [] |
|
|
|
cases = [ |
|
(KMeans, "k-means++", {}, "^-"), |
|
(KMeans, "random", {}, "o-"), |
|
(MiniBatchKMeans, "k-means++", {"max_no_improvement": 3}, "x-"), |
|
(MiniBatchKMeans, "random", {"max_no_improvement": 3, "init_size": 500}, "d-"), |
|
] |
|
|
|
for factory, init, params, format in cases: |
|
print("Evaluation of %s with %s init" % (factory.__name__, init)) |
|
inertia = np.empty((len(n_init_range), n_runs)) |
|
|
|
for run_id in range(n_runs): |
|
X, y = make_data(run_id, n_samples_per_center, grid_size, scale) |
|
for i, n_init in enumerate(n_init_range): |
|
km = factory( |
|
n_clusters=n_clusters, |
|
init=init, |
|
random_state=run_id, |
|
n_init=n_init, |
|
**params, |
|
).fit(X) |
|
inertia[i, run_id] = km.inertia_ |
|
p = plt.errorbar( |
|
n_init_range, inertia.mean(axis=1), inertia.std(axis=1), fmt=format |
|
) |
|
plots.append(p[0]) |
|
legends.append("%s with %s init" % (factory.__name__, init)) |
|
|
|
plt.xlabel("n_init") |
|
plt.ylabel("inertia") |
|
plt.legend(plots, legends) |
|
plt.title("Mean inertia for various k-means init across %d runs" % n_runs) |
|
return plt |
|
|
|
def qual_evaluation(random_state, n_samples_per_center, grid_size): |
|
n_clusters = grid_size**2 |
|
X, y = make_data(random_state, n_samples_per_center, grid_size, scale) |
|
km = MiniBatchKMeans( |
|
n_clusters=n_clusters, init="random", n_init=1, random_state=random_state |
|
).fit(X) |
|
|
|
plt.figure() |
|
for k in range(n_clusters): |
|
my_members = km.labels_ == k |
|
color = cm.nipy_spectral(float(k) / n_clusters, 1) |
|
plt.plot(X[my_members, 0], X[my_members, 1], ".", c=color) |
|
cluster_center = km.cluster_centers_[k] |
|
plt.plot( |
|
cluster_center[0], |
|
cluster_center[1], |
|
"o", |
|
markerfacecolor=color, |
|
markeredgecolor="k", |
|
markersize=6, |
|
) |
|
plt.title( |
|
"Example cluster allocation with a single random init\nwith MiniBatchKMeans" |
|
) |
|
return plt |
|
|
|
with gr.Blocks(theme=theme) as demo: |
|
gr.Markdown(''' |
|
<h1 style='text-align: center'>Empirical evaluation of the impact of k-means initialization π</h1> |
|
''') |
|
gr.Markdown(description) |
|
with gr.Row(): |
|
n_runs = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of Evaluation Runs") |
|
random_state = gr.Slider(minimum=0, maximum=2000, step=5, value=0, label="Random state") |
|
n_samples_per_center = gr.Slider(minimum=50, maximum=200, step=10, value=100, label="Number of Samples per Center") |
|
grid_size = gr.Slider(minimum=1, maximum=8, step=1, value=3, label="Grid Size") |
|
|
|
with gr.Row(): |
|
run_button = gr.Button('Evaluate Inertia') |
|
run_button_qual = gr.Button('Generate Cluster Allocations') |
|
with gr.Row(): |
|
plot_inertia = gr.Plot() |
|
plot_vis = gr.Plot() |
|
run_button.click(fn=quant_evaluation, inputs=[n_runs, n_samples_per_center, grid_size], outputs=plot_inertia) |
|
run_button_qual.click(fn=qual_evaluation, inputs=[random_state, n_samples_per_center, grid_size], outputs=plot_vis) |
|
|
|
demo.launch() |