SantanuBanerjee commited on
Commit
86142db
·
verified ·
1 Parent(s): 5a0f2dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -24
app.py CHANGED
@@ -314,7 +314,7 @@ def extract_location_clusters(df,
314
 
315
 
316
 
317
-
318
 
319
 
320
  # def nlp_pipeline(original_df):
@@ -323,7 +323,6 @@ def nlp_pipeline(original_df):
323
 
324
  # Data Preprocessing
325
  processed_df = data_pre_processing(original_df) # merged_dataset
326
-
327
 
328
  # Starting the Pipeline for Domain Extraction
329
  console_messages.append("Executing Text processing function for Domain identification")
@@ -334,44 +333,31 @@ def nlp_pipeline(original_df):
334
  # processed_df = processed_df.dropna(subset=['Processed_ProblemDescription_forDomainExtraction'], axis=0)
335
  # Drop rows where 'Processed_ProblemDescription_forDomainExtraction' contains empty arrays
336
  processed_df = processed_df[processed_df['Processed_ProblemDescription_forDomainExtraction'].apply(lambda x: len(x) > 0)]
337
-
338
-
339
 
340
  # Domain Clustering
341
  try:
342
- domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
343
- # print(f"Optimal clusters: {optimal_clusters}")
344
- # print(result_df.head())
345
- # console_messages.append(f"Optimal clusters: {optimal_n_clusters}")
346
-
347
- console_messages.append("NLP pipeline completed.")
348
- return domain_df
349
  except Exception as e:
350
- # print(f"Error in extract_problem_domains: {e}")
351
  console_messages.append(f"Error in extract_problem_domains: {str(e)}")
352
- return processed_df
353
- # return domain_df
354
 
355
 
356
- # problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
357
- # location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
358
-
359
- console_messages.append("Starting NLP pipeline for location extraction...")
360
-
361
  # Apply the text_processing_for_location function to the DataFrame
362
  processed_df['Processed_LocationText_forClustering'] = processed_df['Problem_Description'].apply(text_processing_for_location)
363
 
364
  # Location Clustering
365
  try:
366
- location_df, optimal_n_clusters = extract_location_clusters(processed_df)
367
- console_messages.append("NLP pipeline for location extraction completed.")
368
- return location_df
369
  except Exception as e:
370
  console_messages.append(f"Error in extract_location_clusters: {str(e)}")
371
- return processed_df
372
-
373
 
374
 
 
 
375
 
376
 
377
 
 
314
 
315
 
316
 
317
+ import copy
318
 
319
 
320
  # def nlp_pipeline(original_df):
 
323
 
324
  # Data Preprocessing
325
  processed_df = data_pre_processing(original_df) # merged_dataset
 
326
 
327
  # Starting the Pipeline for Domain Extraction
328
  console_messages.append("Executing Text processing function for Domain identification")
 
333
  # processed_df = processed_df.dropna(subset=['Processed_ProblemDescription_forDomainExtraction'], axis=0)
334
  # Drop rows where 'Processed_ProblemDescription_forDomainExtraction' contains empty arrays
335
  processed_df = processed_df[processed_df['Processed_ProblemDescription_forDomainExtraction'].apply(lambda x: len(x) > 0)]
 
 
336
 
337
  # Domain Clustering
338
  try:
339
+ processed_df, optimal_n_clusters = extract_problem_domains(processed_df)
340
+ console_messages.append(f"Optimal clusters for Domain extraction: {optimal_n_clusters}")
 
 
 
 
 
341
  except Exception as e:
 
342
  console_messages.append(f"Error in extract_problem_domains: {str(e)}")
343
+ console_messages.append("NLP pipeline for Problem Domain extraction completed.")
 
344
 
345
 
346
+ console_messages.append("Starting NLP pipeline for Location extraction with text processing.")
 
 
 
 
347
  # Apply the text_processing_for_location function to the DataFrame
348
  processed_df['Processed_LocationText_forClustering'] = processed_df['Problem_Description'].apply(text_processing_for_location)
349
 
350
  # Location Clustering
351
  try:
352
+ processed_df, optimal_n_clusters = extract_location_clusters(processed_df)
353
+ console_messages.append(f"Optimal clusters for Location extraction: {optimal_n_clusters}")
 
354
  except Exception as e:
355
  console_messages.append(f"Error in extract_location_clusters: {str(e)}")
356
+ console_messages.append("NLP pipeline for location extraction completed.")
 
357
 
358
 
359
+ console_messages.append("NLP pipeline completed.")
360
+ return processed_df
361
 
362
 
363