SantanuBanerjee commited on
Commit
a55daf9
·
verified ·
1 Parent(s): 3a814a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -10
app.py CHANGED
@@ -234,7 +234,7 @@ def extract_problem_domains(df,
234
 
235
  # console_messages.append("Returning from Problem Domain Extraction function.")
236
  console_messages.append("Problem Domain Extraction completed.")
237
- return df, optimal_n_clusters
238
 
239
 
240
 
@@ -321,7 +321,7 @@ def extract_location_clusters(df,
321
 
322
  df = df.drop(text_column, axis=1)
323
  console_messages.append("Location Clustering completed.")
324
- return df, optimal_n_clusters
325
 
326
 
327
 
@@ -434,7 +434,7 @@ def nlp_pipeline(original_df):
434
 
435
  # Domain Clustering
436
  try:
437
- processed_df, optimal_n_clusters = extract_problem_domains(processed_df)
438
  console_messages.append(f"Optimal clusters for Domain extraction: {optimal_n_clusters}")
439
  except Exception as e:
440
  console_messages.append(f"Error in extract_problem_domains: {str(e)}")
@@ -449,7 +449,7 @@ def nlp_pipeline(original_df):
449
 
450
  # Location Clustering
451
  try:
452
- processed_df, optimal_n_clusters = extract_location_clusters(processed_df)
453
  console_messages.append(f"Optimal clusters for Location extraction: {optimal_n_clusters}")
454
  except Exception as e:
455
  console_messages.append(f"Error in extract_location_clusters: {str(e)}")
@@ -462,13 +462,13 @@ def nlp_pipeline(original_df):
462
  # Create cluster dataframes
463
  budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
464
 
465
- # Generate project proposals
466
- location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
467
- problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
468
  project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
469
 
470
  console_messages.append("NLP pipeline completed.")
471
- return processed_df, budget_cluster_df, problem_cluster_df, project_proposals
472
 
473
 
474
 
@@ -488,11 +488,16 @@ def process_excel(file):
488
  try:
489
  # Process the DataFrame
490
  console_messages.append("Processing the DataFrame...")
491
- processed_df, budget_cluster_df, problem_cluster_df, project_proposals = nlp_pipeline(df)
492
 
493
  output_filename = "OutPut_PPs.xlsx"
494
  with pd.ExcelWriter(output_filename) as writer:
495
- project_proposals.to_excel(writer, sheet_name='Project_Proposals', index=False)
 
 
 
 
 
496
  budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
497
  problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
498
  processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
 
234
 
235
  # console_messages.append("Returning from Problem Domain Extraction function.")
236
  console_messages.append("Problem Domain Extraction completed.")
237
+ return df, optimal_n_clusters, cluster_representations
238
 
239
 
240
 
 
321
 
322
  df = df.drop(text_column, axis=1)
323
  console_messages.append("Location Clustering completed.")
324
+ return df, optimal_n_clusters, cluster_representations
325
 
326
 
327
 
 
434
 
435
  # Domain Clustering
436
  try:
437
+ processed_df, optimal_n_clusters, problem_clusters = extract_problem_domains(processed_df)
438
  console_messages.append(f"Optimal clusters for Domain extraction: {optimal_n_clusters}")
439
  except Exception as e:
440
  console_messages.append(f"Error in extract_problem_domains: {str(e)}")
 
449
 
450
  # Location Clustering
451
  try:
452
+ processed_df, optimal_n_clusters, location_clusters = extract_location_clusters(processed_df)
453
  console_messages.append(f"Optimal clusters for Location extraction: {optimal_n_clusters}")
454
  except Exception as e:
455
  console_messages.append(f"Error in extract_location_clusters: {str(e)}")
 
462
  # Create cluster dataframes
463
  budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
464
 
465
+ # # Generate project proposals
466
+ # location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
467
+ # problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
468
  project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
469
 
470
  console_messages.append("NLP pipeline completed.")
471
+ return processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters
472
 
473
 
474
 
 
488
  try:
489
  # Process the DataFrame
490
  console_messages.append("Processing the DataFrame...")
491
+ processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters = nlp_pipeline(df)
492
 
493
  output_filename = "OutPut_PPs.xlsx"
494
  with pd.ExcelWriter(output_filename) as writer:
495
+ # Convert project_proposals dictionary to DataFrame
496
+ project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
497
+ project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
498
+ project_proposals_df.reset_index(inplace=True)
499
+ project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
500
+
501
  budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
502
  problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
503
  processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)