|
import gradio as gr |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from sklearn.model_selection import train_test_split |
|
|
|
import matplotlib.cm as cm |
|
from sklearn.utils import shuffle |
|
from sklearn.utils import check_random_state |
|
from sklearn.cluster import MiniBatchKMeans |
|
from sklearn.cluster import KMeans |
|
|
|
theme = gr.themes.Monochrome( |
|
primary_hue="indigo", |
|
secondary_hue="blue", |
|
neutral_hue="slate", |
|
) |
|
|
|
description = f""" |
|
## Description |
|
This demo can be used to evaluate the ability of k-means initializations strategies to make the algorithm convergence robust |
|
""" |
|
|
|
|
|
random_state = np.random.RandomState(0) |
|
|
|
|
|
|
|
n_init_range = np.array([1, 5, 10, 15, 20]) |
|
|
|
|
|
n_samples_per_center = 100 |
|
grid_size = 3 |
|
scale = 0.1 |
|
n_clusters = grid_size**2 |
|
|
|
def make_data(random_state, n_samples_per_center, grid_size, scale): |
|
random_state = check_random_state(random_state) |
|
centers = np.array([[i, j] for i in range(grid_size) for j in range(grid_size)]) |
|
n_clusters_true, n_features = centers.shape |
|
|
|
noise = random_state.normal( |
|
scale=scale, size=(n_samples_per_center, centers.shape[1]) |
|
) |
|
|
|
X = np.concatenate([c + noise for c in centers]) |
|
y = np.concatenate([[i] * n_samples_per_center for i in range(n_clusters_true)]) |
|
return shuffle(X, y, random_state=random_state) |
|
|
|
def quant_evaluation(n_runs): |
|
plt.figure() |
|
plots = [] |
|
legends = [] |
|
|
|
cases = [ |
|
(KMeans, "k-means++", {}, "^-"), |
|
(KMeans, "random", {}, "o-"), |
|
(MiniBatchKMeans, "k-means++", {"max_no_improvement": 3}, "x-"), |
|
(MiniBatchKMeans, "random", {"max_no_improvement": 3, "init_size": 500}, "d-"), |
|
] |
|
|
|
for factory, init, params, format in cases: |
|
print("Evaluation of %s with %s init" % (factory.__name__, init)) |
|
inertia = np.empty((len(n_init_range), n_runs)) |
|
|
|
for run_id in range(n_runs): |
|
X, y = make_data(run_id, n_samples_per_center, grid_size, scale) |
|
for i, n_init in enumerate(n_init_range): |
|
km = factory( |
|
n_clusters=n_clusters, |
|
init=init, |
|
random_state=run_id, |
|
n_init=n_init, |
|
**params, |
|
).fit(X) |
|
inertia[i, run_id] = km.inertia_ |
|
p = plt.errorbar( |
|
n_init_range, inertia.mean(axis=1), inertia.std(axis=1), fmt=format |
|
) |
|
plots.append(p[0]) |
|
legends.append("%s with %s init" % (factory.__name__, init)) |
|
|
|
plt.xlabel("n_init") |
|
plt.ylabel("inertia") |
|
plt.legend(plots, legends) |
|
plt.title("Mean inertia for various k-means init across %d runs" % n_runs) |
|
return plt |
|
|
|
with gr.Blocks(theme=theme) as demo: |
|
gr.Markdown(''' |
|
<div> |
|
<h1 style='text-align: center'>Empirical evaluation of the impact of k-means initialization π</h1> |
|
</div> |
|
''') |
|
gr.Markdown(description) |
|
n_runs = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of Evaluation Runs") |
|
run_button = gr.Button('Evaluate') |
|
plot_inertia = gr.Plot() |
|
run_button.click(fn=quant_evaluation, inputs=[n_runs], outputs=plot_inertia) |
|
|
|
demo.launch() |