Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -274,7 +274,7 @@ def extract_problem_domains(df,
|
|
274 |
optimal_n_clusters = cluster_range[0] + silhouette_scores.index(max(silhouette_scores))
|
275 |
|
276 |
# Perform final clustering with optimal number of clusters
|
277 |
-
kmeans = KMeans(n_clusters=optimal_n_clusters
|
278 |
cluster_labels = kmeans.fit_predict(X)
|
279 |
|
280 |
# # BERTopic approach (commented out)
|
@@ -312,14 +312,14 @@ def extract_problem_domains(df,
|
|
312 |
|
313 |
try:
|
314 |
center = kmeans.cluster_centers_[i]
|
315 |
-
console_messages.append(f"Processing cluster {i}")
|
316 |
-
console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
|
317 |
|
318 |
top_word_indices = center.argsort()[-top_words:][::-1].tolist()
|
319 |
-
console_messages.append(f"Top word indices: {top_word_indices}")
|
320 |
|
321 |
top_words = [feature_names[index] for index in top_word_indices]
|
322 |
-
console_messages.append(f"Top words: {top_words}")
|
323 |
|
324 |
cluster_representations[i] = top_words
|
325 |
except Exception as e:
|
@@ -327,15 +327,15 @@ def extract_problem_domains(df,
|
|
327 |
console_messages.append(f"Center: {center}")
|
328 |
|
329 |
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
|
334 |
# Map cluster labels to representative words
|
335 |
df["Problem_Cluster"] = cluster_labels
|
336 |
df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
|
337 |
|
338 |
-
console_messages.append("Returning from Problem Domain Extraction function.")
|
|
|
339 |
return df, optimal_n_clusters
|
340 |
|
341 |
|
|
|
274 |
optimal_n_clusters = cluster_range[0] + silhouette_scores.index(max(silhouette_scores))
|
275 |
|
276 |
# Perform final clustering with optimal number of clusters
|
277 |
+
kmeans = KMeans(n_clusters=optimal_n_clusters) #, random_state=42)
|
278 |
cluster_labels = kmeans.fit_predict(X)
|
279 |
|
280 |
# # BERTopic approach (commented out)
|
|
|
312 |
|
313 |
try:
|
314 |
center = kmeans.cluster_centers_[i]
|
315 |
+
# console_messages.append(f"Processing cluster {i}")
|
316 |
+
# console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
|
317 |
|
318 |
top_word_indices = center.argsort()[-top_words:][::-1].tolist()
|
319 |
+
# console_messages.append(f"Top word indices: {top_word_indices}")
|
320 |
|
321 |
top_words = [feature_names[index] for index in top_word_indices]
|
322 |
+
# console_messages.append(f"Top words: {top_words}")
|
323 |
|
324 |
cluster_representations[i] = top_words
|
325 |
except Exception as e:
|
|
|
327 |
console_messages.append(f"Center: {center}")
|
328 |
|
329 |
|
330 |
+
console_messages.append(f"Number of clusters: {optimal_n_clusters}")
|
331 |
+
console_messages.append(f"Sample cluster words: {cluster_representations[0][:5]}...")
|
|
|
332 |
|
333 |
# Map cluster labels to representative words
|
334 |
df["Problem_Cluster"] = cluster_labels
|
335 |
df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
|
336 |
|
337 |
+
# console_messages.append("Returning from Problem Domain Extraction function.")
|
338 |
+
console_messages.append("Problem Domain Extraction completed.")
|
339 |
return df, optimal_n_clusters
|
340 |
|
341 |
|