Spaces:

SantanuBanerjee
/

TaxDirection

Sleeping

App Files Files Community

SantanuBanerjee commited on Aug 6, 2024

Commit

a55daf9

verified ·

1 Parent(s): 3a814a1

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -10

app.py CHANGED Viewed

@@ -234,7 +234,7 @@ def extract_problem_domains(df,
     # console_messages.append("Returning from Problem Domain Extraction function.")
     console_messages.append("Problem Domain Extraction completed.")
-    return df, optimal_n_clusters
@@ -321,7 +321,7 @@ def extract_location_clusters(df,
     df = df.drop(text_column, axis=1)
     console_messages.append("Location Clustering completed.")
-    return df, optimal_n_clusters
@@ -434,7 +434,7 @@ def nlp_pipeline(original_df):
     # Domain Clustering
     try:
-        processed_df, optimal_n_clusters = extract_problem_domains(processed_df)
         console_messages.append(f"Optimal clusters for Domain extraction: {optimal_n_clusters}")
     except Exception as e:
         console_messages.append(f"Error in extract_problem_domains: {str(e)}")
@@ -449,7 +449,7 @@ def nlp_pipeline(original_df):
     # Location Clustering
     try:
-        processed_df, optimal_n_clusters = extract_location_clusters(processed_df)
         console_messages.append(f"Optimal clusters for Location extraction: {optimal_n_clusters}")
     except Exception as e:
         console_messages.append(f"Error in extract_location_clusters: {str(e)}")
@@ -462,13 +462,13 @@ def nlp_pipeline(original_df):
     # Create cluster dataframes
     budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
-    # Generate project proposals
-    location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
-    problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
     project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
     console_messages.append("NLP pipeline completed.")
-    return processed_df, budget_cluster_df, problem_cluster_df, project_proposals
@@ -488,11 +488,16 @@ def process_excel(file):
     try:
         # Process the DataFrame
         console_messages.append("Processing the DataFrame...")
-        processed_df, budget_cluster_df, problem_cluster_df, project_proposals = nlp_pipeline(df)
         output_filename = "OutPut_PPs.xlsx"
         with pd.ExcelWriter(output_filename) as writer:
-            project_proposals.to_excel(writer, sheet_name='Project_Proposals', index=False)
             budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
             problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
             processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)

     # console_messages.append("Returning from Problem Domain Extraction function.")
     console_messages.append("Problem Domain Extraction completed.")
+    return df, optimal_n_clusters, cluster_representations
     df = df.drop(text_column, axis=1)
     console_messages.append("Location Clustering completed.")
+    return df, optimal_n_clusters, cluster_representations
     # Domain Clustering
     try:
+        processed_df, optimal_n_clusters, problem_clusters = extract_problem_domains(processed_df)
         console_messages.append(f"Optimal clusters for Domain extraction: {optimal_n_clusters}")
     except Exception as e:
         console_messages.append(f"Error in extract_problem_domains: {str(e)}")
     # Location Clustering
     try:
+        processed_df, optimal_n_clusters, location_clusters = extract_location_clusters(processed_df)
         console_messages.append(f"Optimal clusters for Location extraction: {optimal_n_clusters}")
     except Exception as e:
         console_messages.append(f"Error in extract_location_clusters: {str(e)}")
     # Create cluster dataframes
     budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
+    # # Generate project proposals
+    # location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
+    # problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
     project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
     console_messages.append("NLP pipeline completed.")
+    return processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters
     try:
         # Process the DataFrame
         console_messages.append("Processing the DataFrame...")
+        processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters = nlp_pipeline(df)
         output_filename = "OutPut_PPs.xlsx"
         with pd.ExcelWriter(output_filename) as writer:
+            # Convert project_proposals dictionary to DataFrame
+            project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
+            project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
+            project_proposals_df.reset_index(inplace=True)
+            project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
             budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
             problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
             processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)