File size: 4,279 Bytes
95c86eb bd9e528 95c86eb bd9e528 95c86eb bd9e528 95c86eb 5f0c2ca 95c86eb 5f0c2ca 95c86eb bd9e528 5f0c2ca bd9e528 5f0c2ca bd9e528 95c86eb bd9e528 95c86eb bd9e528 95c86eb bd9e528 3d2ccd9 bd9e528 95c86eb bd9e528 95c86eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# Gradio Implementation: Lenix Carter
# License: BSD 3-Clause or CC-0
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.cluster import kmeans_plusplus
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
plt.switch_backend("agg")
def initial_points(X, y_true, n_components, n_clust):
# Calculate seeds from k-means++
centers_init, indices = kmeans_plusplus(X, n_clusters=n_clust, random_state=0)
# Plot init seeds along side sample data
init_points_plot, ax = plt.subplots()
for k in range(n_components):
cluster_data = y_true == k
plt.scatter(X[cluster_data, 0], X[cluster_data, 1], marker=".", s=10)
plt.subplots_adjust(top=0.8)
plt.scatter(centers_init[:, 0], centers_init[:, 1], c="b", s=50)
plt.title("K-Means++ Initialization")
plt.xticks([])
plt.yticks([])
return init_points_plot
def one_step(X, n_clust):
kmeans = KMeans(n_clusters=n_clust, max_iter=1, n_init=1, random_state=0).fit(X)
y_hat = kmeans.predict(X)
one_step, ax = plt.subplots()
plt.scatter(X[:, 0], X[:, 1], marker=".", s=10, c=y_hat)
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c="b", s=50)
plt.subplots_adjust(top=0.8)
plt.title("K-Means After One Step")
plt.xticks([])
plt.yticks([])
return one_step
def k_means(n_samples, n_components, clst_std, n_clust):
plt.clf()
# Generate sample data
X, y_true = make_blobs(
n_samples=n_samples, centers=n_components, cluster_std=clst_std, random_state=0
)
X = X[:, ::-1]
plus_plot = initial_points(X, y_true, n_components, n_clust)
step_plot = one_step(X, n_clust)
return plus_plot, step_plot
title = "An example of K-Means++ Initialization"
with gr.Blocks() as demo:
gr.Markdown(f" # {title}")
gr.Markdown("""
This example shows the ouput of the K-Means++ function.
K-Means++ is the default initialization function for the K-Means algorithm in scikit learn. K-Means++ serves to find smarter centroids or mean points. This prevents the common drawback of K-Means, where poor initialization points lead to poor results. These points will serve as initialization points for the iterative clustering.
In this example, we use blobs to demonstrate the algorithm. The blobs are groups of points where the smaller the standard deviation, the tighter they are packed. We can initialize number of blobs and number of clusters separately to demonstrate how the algorithms perform when the optimal number of clusters for the number of blobs was not chosen.
This is based on the example [here](https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_plusplus.html#sphx-glr-auto-examples-cluster-plot-kmeans-plusplus-py).
""")
with gr.Row():
with gr.Column():
n_samples = gr.Slider(100, 4000, 1000, step=1,
label="Number of Samples")
n_clusters = gr.Slider(1, 10, 4, step=1,
label="Number of Clusters to Initialize")
with gr.Column():
n_components = gr.Slider(1, 10, 4, step=1, label="Number of blobs")
clst_std = gr.Slider(.1, 1, .6, label="Blob Standard Deviation")
with gr.Row():
graph_points = gr.Plot(label="K-Means++ Initial Points")
init_plus_one = gr.Plot(label="K-Means after one Step")
n_samples.change(
fn=k_means,
inputs=[n_samples, n_components, clst_std, n_clusters],
outputs=[graph_points, init_plus_one]
)
n_clusters.change(
fn=k_means,
inputs=[n_samples, n_components, clst_std, n_clusters],
outputs=[graph_points, init_plus_one]
)
n_components.change(
fn=k_means,
inputs=[n_samples, n_components, clst_std, n_clusters],
outputs=[graph_points, init_plus_one]
)
clst_std.change(
fn=k_means,
inputs=[n_samples, n_components, clst_std, n_clusters],
outputs=[graph_points, init_plus_one]
)
if __name__ == '__main__':
demo.launch()
|