Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -346,7 +346,6 @@ def extract_problem_domains(df,
|
|
346 |
|
347 |
# console_messages.append(f"top_words: {top_words}, type: {type(top_words)}")
|
348 |
# console_messages.append(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
349 |
-
|
350 |
|
351 |
# # top_word_indices = center.argsort()[-top_words:][::-1]
|
352 |
# top_word_indices = center.argsort()[-top_words:][::-1].tolist() # Indexes of top words
|
@@ -354,24 +353,33 @@ def extract_problem_domains(df,
|
|
354 |
# top_words = [feature_names[index] for index in top_word_indices]
|
355 |
# cluster_representations[i] = top_words
|
356 |
|
357 |
-
|
358 |
try:
|
359 |
center = kmeans.cluster_centers_[i]
|
360 |
# console_messages.append(f"Processing cluster {i}")
|
361 |
# console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
|
362 |
|
363 |
-
|
364 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
365 |
|
366 |
-
top_words = [feature_names[index] for index in top_word_indices]
|
367 |
-
# console_messages.append(f"Top words: {top_words}")
|
368 |
|
369 |
-
cluster_representations[i] = top_words
|
370 |
except Exception as e:
|
371 |
console_messages.append(f"Error processing cluster {i}: {str(e)}")
|
372 |
console_messages.append(f"Center: {center}")
|
373 |
|
374 |
|
|
|
|
|
|
|
|
|
|
|
375 |
console_messages.append(f"Number of clusters: {optimal_n_clusters}")
|
376 |
console_messages.append(f"Sample cluster words: {cluster_representations[0][:5]}...")
|
377 |
|
@@ -390,12 +398,7 @@ def extract_problem_domains(df,
|
|
390 |
|
391 |
|
392 |
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
|
400 |
|
401 |
# def nlp_pipeline(original_df):
|
|
|
346 |
|
347 |
# console_messages.append(f"top_words: {top_words}, type: {type(top_words)}")
|
348 |
# console_messages.append(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
|
|
|
349 |
|
350 |
# # top_word_indices = center.argsort()[-top_words:][::-1]
|
351 |
# top_word_indices = center.argsort()[-top_words:][::-1].tolist() # Indexes of top words
|
|
|
353 |
# top_words = [feature_names[index] for index in top_word_indices]
|
354 |
# cluster_representations[i] = top_words
|
355 |
|
|
|
356 |
try:
|
357 |
center = kmeans.cluster_centers_[i]
|
358 |
# console_messages.append(f"Processing cluster {i}")
|
359 |
# console_messages.append(f"Center shape: {center.shape}, type: {type(center)}")
|
360 |
|
361 |
+
if isinstance(center, np.ndarray) and center.ndim == 1:
|
362 |
+
top_word_indices = center.argsort()[-top_words:][::-1]
|
363 |
+
# top_word_indices = center.argsort()[-top_words:][::-1].tolist()
|
364 |
+
|
365 |
+
console_messages.append(f"Top word indices for cluster {i}: {top_word_indices}")
|
366 |
+
top_words = [feature_names[index] for index in top_word_indices]
|
367 |
+
console_messages.append(f"Top words: {top_words}")
|
368 |
+
cluster_representations[i] = top_words
|
369 |
+
else:
|
370 |
+
console_messages.append(f"Error: Cluster center is not a 1D array for cluster {i}")
|
371 |
|
|
|
|
|
372 |
|
|
|
373 |
except Exception as e:
|
374 |
console_messages.append(f"Error processing cluster {i}: {str(e)}")
|
375 |
console_messages.append(f"Center: {center}")
|
376 |
|
377 |
|
378 |
+
try:
|
379 |
+
center = kmeans.cluster_centers_[i]
|
380 |
+
print(f"Center for cluster {i}: {center}")
|
381 |
+
|
382 |
+
|
383 |
console_messages.append(f"Number of clusters: {optimal_n_clusters}")
|
384 |
console_messages.append(f"Sample cluster words: {cluster_representations[0][:5]}...")
|
385 |
|
|
|
398 |
|
399 |
|
400 |
|
401 |
+
|
|
|
|
|
|
|
|
|
|
|
402 |
|
403 |
|
404 |
# def nlp_pipeline(original_df):
|