Aqsa Kausar commited on
Commit
6431e51
·
unverified ·
1 Parent(s): ed4be89

added kmeans clustering

Browse files
Files changed (1) hide show
  1. embedding_gen.py +24 -0
embedding_gen.py CHANGED
@@ -65,6 +65,27 @@ def visualize_embeddings_3d(reduced_embeddings, skills, output_folder, date):
65
 
66
  fig.show()
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  # Main execution
69
  base_folder = "./tags"
70
  output_folder = "./plots"
@@ -87,3 +108,6 @@ else:
87
  # Reduce dimensions to 3D and visualize
88
  reduced_embeddings_3d = reduce_dimensions(embeddings, n_components=3)
89
  visualize_embeddings_3d(reduced_embeddings_3d, skills, output_folder, specific_date)
 
 
 
 
65
 
66
  fig.show()
67
 
68
+ def perform_kmeans_and_visualize(reduced_embeddings, skills, n_clusters, output_folder, date):
69
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
70
+ labels = kmeans.fit_predict(reduced_embeddings)
71
+
72
+ fig = px.scatter_3d(
73
+ x=reduced_embeddings[:, 0],
74
+ y=reduced_embeddings[:, 1],
75
+ z=reduced_embeddings[:, 2],
76
+ color=labels,
77
+ text=skills,
78
+ title=f"KMeans Clustering with {n_clusters} Clusters ({date})"
79
+ )
80
+
81
+ # Save the clustered plot
82
+ os.makedirs(output_folder, exist_ok=True)
83
+ plot_path = os.path.join(output_folder, f"{date}_3D_clustering.html")
84
+ fig.write_html(plot_path)
85
+ print(f"3D clustered plot saved at {plot_path}")
86
+
87
+ fig.show()
88
+
89
  # Main execution
90
  base_folder = "./tags"
91
  output_folder = "./plots"
 
108
  # Reduce dimensions to 3D and visualize
109
  reduced_embeddings_3d = reduce_dimensions(embeddings, n_components=3)
110
  visualize_embeddings_3d(reduced_embeddings_3d, skills, output_folder, specific_date)
111
+
112
+ # Perform KMeans clustering and visualize in 3D
113
+ perform_kmeans_and_visualize(reduced_embeddings_3d, skills, n_clusters, output_folder, specific_date)