Spaces:

SantanuBanerjee
/

TaxDirection

Sleeping

SantanuBanerjee commited on Aug 5, 2024

Commit

4259c64

verified ·

1 Parent(s): 5130254

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -199,7 +199,7 @@ from collections import Counter
 def extract_problem_domains(df,
                             text_column='Processed_ProblemDescription_forDomainExtraction',
-                            cluster_range=(5, 15),
                             top_words=10):
     console_messages.append("Extracting Problem Domains...")
@@ -280,9 +280,14 @@ def text_processing_for_location(text):
 def extract_location_clusters(df,
-                              text_column='Processed_LocationText_forClustering',
-                              cluster_range=(3, 10),
                               top_words=5):
     console_messages.append("Extracting Location Clusters...")
     # Sentence Transformers approach for embeddings
@@ -313,7 +318,8 @@ def extract_location_clusters(df,
     # Map cluster labels to representative words
     df["Location_Cluster"] = cluster_labels
     df['Location_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
     console_messages.append("Location Clustering completed.")
     return df, optimal_n_clusters

 def extract_problem_domains(df,
                             text_column='Processed_ProblemDescription_forDomainExtraction',
+                            cluster_range=(5, 25),
                             top_words=10):
     console_messages.append("Extracting Problem Domains...")
 def extract_location_clusters(df,
+                              text_column1='Processed_LocationText_forClustering', # Extracted through NLP
+                              text_column2='Geographical_Location', # User Input
+                              cluster_range=(5, 25),
                               top_words=5):
+    # Combine the two text columns
+    text_column = "Combined_Location_Text"
+    df[text_column] = df[text_column1] + ' ' + df[text_column2]
     console_messages.append("Extracting Location Clusters...")
     # Sentence Transformers approach for embeddings
     # Map cluster labels to representative words
     df["Location_Cluster"] = cluster_labels
     df['Location_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
+    df = df.drop(text_column, axis=1)
     console_messages.append("Location Clustering completed.")
     return df, optimal_n_clusters