Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -199,7 +199,7 @@ from collections import Counter
|
|
199 |
|
200 |
def extract_problem_domains(df,
|
201 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
202 |
-
cluster_range=(5,
|
203 |
top_words=10):
|
204 |
console_messages.append("Extracting Problem Domains...")
|
205 |
|
@@ -280,9 +280,14 @@ def text_processing_for_location(text):
|
|
280 |
|
281 |
|
282 |
def extract_location_clusters(df,
|
283 |
-
|
284 |
-
|
|
|
285 |
top_words=5):
|
|
|
|
|
|
|
|
|
286 |
console_messages.append("Extracting Location Clusters...")
|
287 |
|
288 |
# Sentence Transformers approach for embeddings
|
@@ -313,7 +318,8 @@ def extract_location_clusters(df,
|
|
313 |
# Map cluster labels to representative words
|
314 |
df["Location_Cluster"] = cluster_labels
|
315 |
df['Location_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
|
316 |
-
|
|
|
317 |
console_messages.append("Location Clustering completed.")
|
318 |
return df, optimal_n_clusters
|
319 |
|
|
|
199 |
|
200 |
def extract_problem_domains(df,
|
201 |
text_column='Processed_ProblemDescription_forDomainExtraction',
|
202 |
+
cluster_range=(5, 25),
|
203 |
top_words=10):
|
204 |
console_messages.append("Extracting Problem Domains...")
|
205 |
|
|
|
280 |
|
281 |
|
282 |
def extract_location_clusters(df,
|
283 |
+
text_column1='Processed_LocationText_forClustering', # Extracted through NLP
|
284 |
+
text_column2='Geographical_Location', # User Input
|
285 |
+
cluster_range=(5, 25),
|
286 |
top_words=5):
|
287 |
+
# Combine the two text columns
|
288 |
+
text_column = "Combined_Location_Text"
|
289 |
+
df[text_column] = df[text_column1] + ' ' + df[text_column2]
|
290 |
+
|
291 |
console_messages.append("Extracting Location Clusters...")
|
292 |
|
293 |
# Sentence Transformers approach for embeddings
|
|
|
318 |
# Map cluster labels to representative words
|
319 |
df["Location_Cluster"] = cluster_labels
|
320 |
df['Location_Category_Words'] = [cluster_representations[label] for label in cluster_labels]
|
321 |
+
|
322 |
+
df = df.drop(text_column, axis=1)
|
323 |
console_messages.append("Location Clustering completed.")
|
324 |
return df, optimal_n_clusters
|
325 |
|