SantanuBanerjee commited on
Commit
3a92d71
·
verified ·
1 Parent(s): 1f2dca3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -4
app.py CHANGED
@@ -229,12 +229,13 @@ def extract_problem_domains(df,
229
  text_column='Problem_Description',
230
  cluster_range=(10, 50),
231
  top_words=17,
232
- # method='sentence_transformers'
233
- method='tfidf_kmeans'
 
234
  ):
235
 
236
 
237
-
238
 
239
  if method == 'sentence_transformers':
240
  # Sentence Transformers approach
@@ -294,6 +295,12 @@ def extract_problem_domains(df,
294
  for i in range(optimal_n_clusters):
295
  center = kmeans.cluster_centers_[i]
296
 
 
 
 
 
 
 
297
 
298
 
299
  # top_word_indices = center.argsort()[-top_words:][::-1]
@@ -306,6 +313,7 @@ def extract_problem_domains(df,
306
  df["Problem_Cluster"] = cluster_labels
307
  df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
308
 
 
309
  return df, optimal_n_clusters
310
 
311
 
@@ -338,7 +346,7 @@ def nlp_pipeline(original_df, console_messages):
338
 
339
  # Domain Clustering
340
  try:
341
- domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
342
  # print(f"Optimal clusters: {optimal_clusters}")
343
  # print(result_df.head())
344
  # console_messages.append(f"Optimal clusters: {optimal_n_clusters}")
 
229
  text_column='Problem_Description',
230
  cluster_range=(10, 50),
231
  top_words=17,
232
+ # method='sentence_transformers',
233
+ method='tfidf_kmeans',
234
+ console_messages
235
  ):
236
 
237
 
238
+ console_messages.append("Extracting Problem Domains...")
239
 
240
  if method == 'sentence_transformers':
241
  # Sentence Transformers approach
 
295
  for i in range(optimal_n_clusters):
296
  center = kmeans.cluster_centers_[i]
297
 
298
+ # print(f"top_words: {top_words}, type: {type(top_words)}")
299
+ # print(f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}")
300
+
301
+ console_messages.append(f"top_words: {top_words}, type: {type(top_words)}",
302
+ f"center.argsort(): {center.argsort()}, type: {type(center.argsort())}"
303
+ )
304
 
305
 
306
  # top_word_indices = center.argsort()[-top_words:][::-1]
 
313
  df["Problem_Cluster"] = cluster_labels
314
  df['Problem_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
315
 
316
+ console_messages.append("Returning from Problem Domain Extraction function.")
317
  return df, optimal_n_clusters
318
 
319
 
 
346
 
347
  # Domain Clustering
348
  try:
349
+ domain_df, optimal_n_clusters = extract_problem_domains(processed_df, console_messages)
350
  # print(f"Optimal clusters: {optimal_clusters}")
351
  # print(result_df.head())
352
  # console_messages.append(f"Optimal clusters: {optimal_n_clusters}")