SantanuBanerjee commited on
Commit
cf3adb9
·
verified ·
1 Parent(s): ec47bcd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -294,20 +294,42 @@ def extract_problem_domains(df,
294
  feature_names = vectorizer.get_feature_names_out()
295
  cluster_representations = {}
296
  for i in range(optimal_n_clusters):
297
- center = kmeans.cluster_centers_[i]
298
 
299
- # print(f"top_words: {top_words}, type: {type(top_words)}")
300
- # print(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
301
 
302
- console_messages.append(f"top_words: {top_words}, type: {type(top_words)}")
303
- console_messages.append(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
304
 
305
 
306
- # top_word_indices = center.argsort()[-top_words:][::-1]
307
- top_word_indices = center.argsort()[-top_words:][::-1].tolist() # Indexes of top words
308
 
309
- top_words = [feature_names[index] for index in top_word_indices]
310
- cluster_representations[i] = top_words
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
  # Map cluster labels to representative words
313
  df["Problem_Cluster"] = cluster_labels
 
294
  feature_names = vectorizer.get_feature_names_out()
295
  cluster_representations = {}
296
  for i in range(optimal_n_clusters):
297
+ # center = kmeans.cluster_centers_[i]
298
 
299
+ # # print(f"top_words: {top_words}, type: {type(top_words)}")
300
+ # # print(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
301
 
302
+ # console_messages.append(f"top_words: {top_words}, type: {type(top_words)}")
303
+ # console_messages.append(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
304
 
305
 
306
+ # # top_word_indices = center.argsort()[-top_words:][::-1]
307
+ # top_word_indices = center.argsort()[-top_words:][::-1].tolist() # Indexes of top words
308
 
309
+ # top_words = [feature_names[index] for index in top_word_indices]
310
+ # cluster_representations[i] = top_words
311
+
312
+
313
+ try:
314
+ center = kmeans.cluster_centers_[i]
315
+ console_messages.append(f"Processing cluster {i}")
316
+ console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
317
+
318
+ top_word_indices = center.argsort()[-top_words:][::-1].tolist()
319
+ console_messages.append(f"Top word indices: {top_word_indices}")
320
+
321
+ top_words = [feature_names[index] for index in top_word_indices]
322
+ console_messages.append(f"Top words: {top_words}")
323
+
324
+ cluster_representations[i] = top_words
325
+ except Exception as e:
326
+ console_messages.append(f"Error processing cluster {i}: {str(e)}")
327
+ console_messages.append(f"Center: {center}")
328
+
329
+
330
+
331
+
332
+
333
 
334
  # Map cluster labels to representative words
335
  df["Problem_Cluster"] = cluster_labels