Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -314,7 +314,7 @@ def extract_location_clusters(df,
|
|
314 |
|
315 |
|
316 |
|
317 |
-
|
318 |
|
319 |
|
320 |
# def nlp_pipeline(original_df):
|
@@ -323,7 +323,6 @@ def nlp_pipeline(original_df):
|
|
323 |
|
324 |
# Data Preprocessing
|
325 |
processed_df = data_pre_processing(original_df) # merged_dataset
|
326 |
-
|
327 |
|
328 |
# Starting the Pipeline for Domain Extraction
|
329 |
console_messages.append("Executing Text processing function for Domain identification")
|
@@ -334,44 +333,31 @@ def nlp_pipeline(original_df):
|
|
334 |
# processed_df = processed_df.dropna(subset=['Processed_ProblemDescription_forDomainExtraction'], axis=0)
|
335 |
# Drop rows where 'Processed_ProblemDescription_forDomainExtraction' contains empty arrays
|
336 |
processed_df = processed_df[processed_df['Processed_ProblemDescription_forDomainExtraction'].apply(lambda x: len(x) > 0)]
|
337 |
-
|
338 |
-
|
339 |
|
340 |
# Domain Clustering
|
341 |
try:
|
342 |
-
|
343 |
-
|
344 |
-
# print(result_df.head())
|
345 |
-
# console_messages.append(f"Optimal clusters: {optimal_n_clusters}")
|
346 |
-
|
347 |
-
console_messages.append("NLP pipeline completed.")
|
348 |
-
return domain_df
|
349 |
except Exception as e:
|
350 |
-
# print(f"Error in extract_problem_domains: {e}")
|
351 |
console_messages.append(f"Error in extract_problem_domains: {str(e)}")
|
352 |
-
|
353 |
-
# return domain_df
|
354 |
|
355 |
|
356 |
-
|
357 |
-
# location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
|
358 |
-
|
359 |
-
console_messages.append("Starting NLP pipeline for location extraction...")
|
360 |
-
|
361 |
# Apply the text_processing_for_location function to the DataFrame
|
362 |
processed_df['Processed_LocationText_forClustering'] = processed_df['Problem_Description'].apply(text_processing_for_location)
|
363 |
|
364 |
# Location Clustering
|
365 |
try:
|
366 |
-
|
367 |
-
console_messages.append("
|
368 |
-
return location_df
|
369 |
except Exception as e:
|
370 |
console_messages.append(f"Error in extract_location_clusters: {str(e)}")
|
371 |
-
|
372 |
-
|
373 |
|
374 |
|
|
|
|
|
375 |
|
376 |
|
377 |
|
|
|
314 |
|
315 |
|
316 |
|
317 |
+
import copy
|
318 |
|
319 |
|
320 |
# def nlp_pipeline(original_df):
|
|
|
323 |
|
324 |
# Data Preprocessing
|
325 |
processed_df = data_pre_processing(original_df) # merged_dataset
|
|
|
326 |
|
327 |
# Starting the Pipeline for Domain Extraction
|
328 |
console_messages.append("Executing Text processing function for Domain identification")
|
|
|
333 |
# processed_df = processed_df.dropna(subset=['Processed_ProblemDescription_forDomainExtraction'], axis=0)
|
334 |
# Drop rows where 'Processed_ProblemDescription_forDomainExtraction' contains empty arrays
|
335 |
processed_df = processed_df[processed_df['Processed_ProblemDescription_forDomainExtraction'].apply(lambda x: len(x) > 0)]
|
|
|
|
|
336 |
|
337 |
# Domain Clustering
|
338 |
try:
|
339 |
+
processed_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
340 |
+
console_messages.append(f"Optimal clusters for Domain extraction: {optimal_n_clusters}")
|
|
|
|
|
|
|
|
|
|
|
341 |
except Exception as e:
|
|
|
342 |
console_messages.append(f"Error in extract_problem_domains: {str(e)}")
|
343 |
+
console_messages.append("NLP pipeline for Problem Domain extraction completed.")
|
|
|
344 |
|
345 |
|
346 |
+
console_messages.append("Starting NLP pipeline for Location extraction with text processing.")
|
|
|
|
|
|
|
|
|
347 |
# Apply the text_processing_for_location function to the DataFrame
|
348 |
processed_df['Processed_LocationText_forClustering'] = processed_df['Problem_Description'].apply(text_processing_for_location)
|
349 |
|
350 |
# Location Clustering
|
351 |
try:
|
352 |
+
processed_df, optimal_n_clusters = extract_location_clusters(processed_df)
|
353 |
+
console_messages.append(f"Optimal clusters for Location extraction: {optimal_n_clusters}")
|
|
|
354 |
except Exception as e:
|
355 |
console_messages.append(f"Error in extract_location_clusters: {str(e)}")
|
356 |
+
console_messages.append("NLP pipeline for location extraction completed.")
|
|
|
357 |
|
358 |
|
359 |
+
console_messages.append("NLP pipeline completed.")
|
360 |
+
return processed_df
|
361 |
|
362 |
|
363 |
|