Spaces:

SantanuBanerjee
/

TaxDirection

Sleeping

App Files Files Community

SantanuBanerjee commited on Aug 4, 2024

Commit

e636253

verified ·

1 Parent(s): 0c700cf

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -7

app.py CHANGED Viewed

@@ -327,9 +327,18 @@ def nlp_pipeline(original_df):
     # Starting the Pipeline for Domain Extraction
     # Apply the text_processing_for_domain function to the DataFrame
     processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
     # Domain Clustering
     domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
     # problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
     # location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
@@ -338,30 +347,37 @@ def nlp_pipeline(original_df):
     # return processed_df
-    return domain_df
 def process_excel(file):
     try:
         # Ensure the file path is correct
         file_path = file.name if hasattr(file, 'name') else file
         # Read the Excel file
         df = pd.read_excel(file_path)
         # Process the DataFrame
-        result_df = nlp_pipeline(df)
         # output_file = "Output_ProjectProposals.xlsx"
         output_file = "Output_Proposals.xlsx"
         result_df.to_excel(output_file, index=False)
-        return output_file, "Santanu Banerjee"  # Return the processed DataFrame as Excel file
     except Exception as e:
         # return str(e)  # Return the error message
-        error_message = f"Error processing file: {str(e)}"
-        print(error_message)  # Log the error
-        return error_message, "Santanu Banerjee" # Return the error message to the user

     # Starting the Pipeline for Domain Extraction
     # Apply the text_processing_for_domain function to the DataFrame
     processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
     # Domain Clustering
     domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
+    try:
+        domain_df, optimal_n_clusters = extract_problem_domains(df, method='tfidf_kmeans')
+        print(f"Optimal clusters: {optimal_clusters}")
+        print(result_df.head())
+    except Exception as e:
+        print(f"Error in extract_problem_domains: {e}")
     # problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
     # location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
     # return processed_df
+    return domain_df, "NLP Pipeline"
 def process_excel(file):
+    console_messages = []
     try:
         # Ensure the file path is correct
+        console_messages.append("Reading the uploaded Excel file...")
         file_path = file.name if hasattr(file, 'name') else file
         # Read the Excel file
         df = pd.read_excel(file_path)
         # Process the DataFrame
+        console_messages.append("Processing the DataFrame...")
+        result_df, console_messages = nlp_pipeline(df)
         # output_file = "Output_ProjectProposals.xlsx"
         output_file = "Output_Proposals.xlsx"
         result_df.to_excel(output_file, index=False)
+        console_messages.append("Processing completed. Ready for download.")
+        return output_file, "\n".join(console_messages)  # Return the processed DataFrame as Excel file
     except Exception as e:
         # return str(e)  # Return the error message
+        # error_message = f"Error processing file: {str(e)}"
+        # print(error_message)  # Log the error
+        console_messages.append(f"Error during processing: {str(e)}")
+        # return error_message, "Santanu Banerjee" # Return the error message to the user
+        return None, "\n".join(console_messages)