SantanuBanerjee commited on
Commit
ba1e210
·
verified ·
1 Parent(s): cf3adb9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -274,7 +274,7 @@ def extract_problem_domains(df,
274
  optimal_n_clusters = cluster_range[0] + silhouette_scores.index(max(silhouette_scores))
275
 
276
  # Perform final clustering with optimal number of clusters
277
- kmeans = KMeans(n_clusters=optimal_n_clusters, random_state=42)
278
  cluster_labels = kmeans.fit_predict(X)
279
 
280
  # # BERTopic approach (commented out)
@@ -312,14 +312,14 @@ def extract_problem_domains(df,
312
 
313
  try:
314
  center = kmeans.cluster_centers_[i]
315
- console_messages.append(f"Processing cluster {i}")
316
- console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
317
 
318
  top_word_indices = center.argsort()[-top_words:][::-1].tolist()
319
- console_messages.append(f"Top word indices: {top_word_indices}")
320
 
321
  top_words = [feature_names[index] for index in top_word_indices]
322
- console_messages.append(f"Top words: {top_words}")
323
 
324
  cluster_representations[i] = top_words
325
  except Exception as e:
@@ -327,15 +327,15 @@ def extract_problem_domains(df,
327
  console_messages.append(f"Center: {center}")
328
 
329
 
330
-
331
-
332
-
333
 
334
  # Map cluster labels to representative words
335
  df["Problem_Cluster"] = cluster_labels
336
  df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
337
 
338
- console_messages.append("Returning from Problem Domain Extraction function.")
 
339
  return df, optimal_n_clusters
340
 
341
 
 
274
  optimal_n_clusters = cluster_range[0] + silhouette_scores.index(max(silhouette_scores))
275
 
276
  # Perform final clustering with optimal number of clusters
277
+ kmeans = KMeans(n_clusters=optimal_n_clusters) #, random_state=42)
278
  cluster_labels = kmeans.fit_predict(X)
279
 
280
  # # BERTopic approach (commented out)
 
312
 
313
  try:
314
  center = kmeans.cluster_centers_[i]
315
+ # console_messages.append(f"Processing cluster {i}")
316
+ # console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
317
 
318
  top_word_indices = center.argsort()[-top_words:][::-1].tolist()
319
+ # console_messages.append(f"Top word indices: {top_word_indices}")
320
 
321
  top_words = [feature_names[index] for index in top_word_indices]
322
+ # console_messages.append(f"Top words: {top_words}")
323
 
324
  cluster_representations[i] = top_words
325
  except Exception as e:
 
327
  console_messages.append(f"Center: {center}")
328
 
329
 
330
+ console_messages.append(f"Number of clusters: {optimal_n_clusters}")
331
+ console_messages.append(f"Sample cluster words: {cluster_representations[0][:5]}...")
 
332
 
333
  # Map cluster labels to representative words
334
  df["Problem_Cluster"] = cluster_labels
335
  df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
336
 
337
+ # console_messages.append("Returning from Problem Domain Extraction function.")
338
+ console_messages.append("Problem Domain Extraction completed.")
339
  return df, optimal_n_clusters
340
 
341