LenixC commited on
Commit
bd9e528
·
1 Parent(s): 95c86eb

Added K-Means after one step, slight reorganization, clarified demonstration.

Browse files
Files changed (1) hide show
  1. app.py +52 -19
app.py CHANGED
@@ -5,24 +5,17 @@ import gradio as gr
5
  import matplotlib.pyplot as plt
6
 
7
  from sklearn.cluster import kmeans_plusplus
 
8
  from sklearn.datasets import make_blobs
9
 
10
  plt.switch_backend("agg")
11
 
12
- def initial_points(n_samples, n_components, clst_std, n_clust):
13
- plt.clf()
14
- # Generate sample data
15
-
16
- X, y_true = make_blobs(
17
- n_samples=n_samples, centers=n_components, cluster_std=clst_std, random_state=0
18
- )
19
- X = X[:, ::-1]
20
-
21
  # Calculate seeds from k-means++
22
  centers_init, indices = kmeans_plusplus(X, n_clusters=n_clust, random_state=0)
23
-
24
  # Plot init seeds along side sample data
25
- plt.figure(1)
26
 
27
  for k in range(n_components):
28
  cluster_data = y_true == k
@@ -32,7 +25,36 @@ def initial_points(n_samples, n_components, clst_std, n_clust):
32
  plt.title("K-Means++ Initialization")
33
  plt.xticks([])
34
  plt.yticks([])
35
- return plt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  title = "An example of K-Means++ Initialization"
38
  with gr.Blocks() as demo:
@@ -40,21 +62,32 @@ with gr.Blocks() as demo:
40
  gr.Markdown("""
41
  This example shows the ouput of the K-Means++ function.
42
 
 
 
 
 
43
  This is based on the example [here](https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_plusplus.html#sphx-glr-auto-examples-cluster-plot-kmeans-plusplus-py).
44
  """)
45
  with gr.Row():
46
  with gr.Column():
47
- n_samples = gr.Slider(100, 4000, 1000, label="Number of Samples")
 
 
 
 
48
  n_components = gr.Slider(1, 10, 4, step=1, label="Number of blobs")
49
  clst_std = gr.Slider(.1, 1, .6, label="Blob Standard Deviation")
50
- n_clusters = gr.Slider(1, 10, 4, step=1, label="Number of Clusters to Initialize")
51
- btn = gr.Button(label="Run")
52
- with gr.Column():
53
- graph_points = gr.Plot(label="K-Means++ Initial Points")
 
 
 
54
  btn.click(
55
- fn=initial_points,
56
  inputs=[n_samples, n_components, clst_std, n_clusters],
57
- outputs=[graph_points]
58
  )
59
 
60
  if __name__ == '__main__':
 
5
  import matplotlib.pyplot as plt
6
 
7
  from sklearn.cluster import kmeans_plusplus
8
+ from sklearn.cluster import KMeans
9
  from sklearn.datasets import make_blobs
10
 
11
  plt.switch_backend("agg")
12
 
13
+ def initial_points(X, y_true, n_components, n_clust):
 
 
 
 
 
 
 
 
14
  # Calculate seeds from k-means++
15
  centers_init, indices = kmeans_plusplus(X, n_clusters=n_clust, random_state=0)
16
+
17
  # Plot init seeds along side sample data
18
+ init_points_plot = plt.figure()
19
 
20
  for k in range(n_components):
21
  cluster_data = y_true == k
 
25
  plt.title("K-Means++ Initialization")
26
  plt.xticks([])
27
  plt.yticks([])
28
+ return init_points_plot
29
+
30
+ def one_step(X, n_clust):
31
+ kmeans = KMeans(n_clusters=n_clust, max_iter=1, n_init=1, random_state=0).fit(X)
32
+ y_hat = kmeans.predict(X)
33
+
34
+ one_step = plt.figure()
35
+ plt.scatter(X[:, 0], X[:, 1], marker=".", s=10, c=y_hat)
36
+ centers = kmeans.cluster_centers_
37
+ plt.scatter(centers[:, 0], centers[:, 1], c="b", s=50)
38
+
39
+ plt.title("K-Means After One Step")
40
+ plt.xticks([])
41
+ plt.yticks([])
42
+
43
+ return one_step
44
+
45
+ def k_means(n_samples, n_components, clst_std, n_clust):
46
+ plt.clf()
47
+ # Generate sample data
48
+
49
+ X, y_true = make_blobs(
50
+ n_samples=n_samples, centers=n_components, cluster_std=clst_std, random_state=0
51
+ )
52
+ X = X[:, ::-1]
53
+
54
+ plus_plot = initial_points(X, y_true, n_components, n_clust)
55
+ step_plot = one_step(X, n_clust)
56
+
57
+ return plus_plot, step_plot
58
 
59
  title = "An example of K-Means++ Initialization"
60
  with gr.Blocks() as demo:
 
62
  gr.Markdown("""
63
  This example shows the ouput of the K-Means++ function.
64
 
65
+ K-Means++ is the default initialization function for the K-Means algorithm in scikit learn. K-Means++ serves to find smarter centroids or mean points. This prevents the common drawback of K-Means, where poor initialization points lead to poor results. These points will serve as initialization points for the iterative clustering.
66
+
67
+ In this example, we use blobs to demonstrate the algorithm. The blobs are groups of points where the smaller the standard deviation, the tighter they are packed. We can initialize number of blobs and number of clusters separately to demonstrate how the algorithms perform when the optimal number of clusters for the number of blobs was not chosen.
68
+
69
  This is based on the example [here](https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_plusplus.html#sphx-glr-auto-examples-cluster-plot-kmeans-plusplus-py).
70
  """)
71
  with gr.Row():
72
  with gr.Column():
73
+ n_samples = gr.Slider(100, 4000, 1000, step=1,
74
+ label="Number of Samples")
75
+ n_clusters = gr.Slider(1, 10, 4, step=1,
76
+ label="Number of Clusters to Initialize")
77
+ with gr.Column():
78
  n_components = gr.Slider(1, 10, 4, step=1, label="Number of blobs")
79
  clst_std = gr.Slider(.1, 1, .6, label="Blob Standard Deviation")
80
+
81
+ btn = gr.Button(label="Run")
82
+
83
+ with gr.Row():
84
+ graph_points = gr.Plot(label="K-Means++ Initial Points")
85
+ init_plus_one = gr.Plot(label="K-Means after one Step")
86
+
87
  btn.click(
88
+ fn=k_means,
89
  inputs=[n_samples, n_components, clst_std, n_clusters],
90
+ outputs=[graph_points, init_plus_one]
91
  )
92
 
93
  if __name__ == '__main__':