Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -294,20 +294,42 @@ def extract_problem_domains(df,
|
|
294 |
feature_names = vectorizer.get_feature_names_out()
|
295 |
cluster_representations = {}
|
296 |
for i in range(optimal_n_clusters):
|
297 |
-
center = kmeans.cluster_centers_[i]
|
298 |
|
299 |
-
# print(f"top_words: {top_words}, type: {type(top_words)}")
|
300 |
-
# print(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
301 |
|
302 |
-
console_messages.append(f"top_words: {top_words}, type: {type(top_words)}")
|
303 |
-
console_messages.append(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
304 |
|
305 |
|
306 |
-
# top_word_indices = center.argsort()[-top_words:][::-1]
|
307 |
-
top_word_indices = center.argsort()[-top_words:][::-1].tolist() # Indexes of top words
|
308 |
|
309 |
-
top_words = [feature_names[index] for index in top_word_indices]
|
310 |
-
cluster_representations[i] = top_words
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
312 |
# Map cluster labels to representative words
|
313 |
df["Problem_Cluster"] = cluster_labels
|
|
|
294 |
feature_names = vectorizer.get_feature_names_out()
|
295 |
cluster_representations = {}
|
296 |
for i in range(optimal_n_clusters):
|
297 |
+
# center = kmeans.cluster_centers_[i]
|
298 |
|
299 |
+
# # print(f"top_words: {top_words}, type: {type(top_words)}")
|
300 |
+
# # print(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
301 |
|
302 |
+
# console_messages.append(f"top_words: {top_words}, type: {type(top_words)}")
|
303 |
+
# console_messages.append(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
304 |
|
305 |
|
306 |
+
# # top_word_indices = center.argsort()[-top_words:][::-1]
|
307 |
+
# top_word_indices = center.argsort()[-top_words:][::-1].tolist() # Indexes of top words
|
308 |
|
309 |
+
# top_words = [feature_names[index] for index in top_word_indices]
|
310 |
+
# cluster_representations[i] = top_words
|
311 |
+
|
312 |
+
|
313 |
+
try:
|
314 |
+
center = kmeans.cluster_centers_[i]
|
315 |
+
console_messages.append(f"Processing cluster {i}")
|
316 |
+
console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
|
317 |
+
|
318 |
+
top_word_indices = center.argsort()[-top_words:][::-1].tolist()
|
319 |
+
console_messages.append(f"Top word indices: {top_word_indices}")
|
320 |
+
|
321 |
+
top_words = [feature_names[index] for index in top_word_indices]
|
322 |
+
console_messages.append(f"Top words: {top_words}")
|
323 |
+
|
324 |
+
cluster_representations[i] = top_words
|
325 |
+
except Exception as e:
|
326 |
+
console_messages.append(f"Error processing cluster {i}: {str(e)}")
|
327 |
+
console_messages.append(f"Center: {center}")
|
328 |
+
|
329 |
+
|
330 |
+
|
331 |
+
|
332 |
+
|
333 |
|
334 |
# Map cluster labels to representative words
|
335 |
df["Problem_Cluster"] = cluster_labels
|