SantanuBanerjee commited on
Commit
4259c64
·
verified ·
1 Parent(s): 5130254

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -199,7 +199,7 @@ from collections import Counter
199
 
200
  def extract_problem_domains(df,
201
  text_column='Processed_ProblemDescription_forDomainExtraction',
202
- cluster_range=(5, 15),
203
  top_words=10):
204
  console_messages.append("Extracting Problem Domains...")
205
 
@@ -280,9 +280,14 @@ def text_processing_for_location(text):
280
 
281
 
282
  def extract_location_clusters(df,
283
- text_column='Processed_LocationText_forClustering',
284
- cluster_range=(3, 10),
 
285
  top_words=5):
 
 
 
 
286
  console_messages.append("Extracting Location Clusters...")
287
 
288
  # Sentence Transformers approach for embeddings
@@ -313,7 +318,8 @@ def extract_location_clusters(df,
313
  # Map cluster labels to representative words
314
  df["Location_Cluster"] = cluster_labels
315
  df['Location_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
316
-
 
317
  console_messages.append("Location Clustering completed.")
318
  return df, optimal_n_clusters
319
 
 
199
 
200
  def extract_problem_domains(df,
201
  text_column='Processed_ProblemDescription_forDomainExtraction',
202
+ cluster_range=(5, 25),
203
  top_words=10):
204
  console_messages.append("Extracting Problem Domains...")
205
 
 
280
 
281
 
282
  def extract_location_clusters(df,
283
+ text_column1='Processed_LocationText_forClustering', # Extracted through NLP
284
+ text_column2='Geographical_Location', # User Input
285
+ cluster_range=(5, 25),
286
  top_words=5):
287
+ # Combine the two text columns
288
+ text_column = "Combined_Location_Text"
289
+ df[text_column] = df[text_column1] + ' ' + df[text_column2]
290
+
291
  console_messages.append("Extracting Location Clusters...")
292
 
293
  # Sentence Transformers approach for embeddings
 
318
  # Map cluster labels to representative words
319
  df["Location_Cluster"] = cluster_labels
320
  df['Location_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
321
+
322
+ df = df.drop(text_column, axis=1)
323
  console_messages.append("Location Clustering completed.")
324
  return df, optimal_n_clusters
325