SantanuBanerjee commited on
Commit
e636253
·
verified ·
1 Parent(s): 0c700cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -7
app.py CHANGED
@@ -327,9 +327,18 @@ def nlp_pipeline(original_df):
327
  # Starting the Pipeline for Domain Extraction
328
  # Apply the text_processing_for_domain function to the DataFrame
329
  processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
 
 
330
  # Domain Clustering
331
  domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
332
 
 
 
 
 
 
 
 
333
 
334
  # problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
335
  # location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
@@ -338,30 +347,37 @@ def nlp_pipeline(original_df):
338
 
339
 
340
  # return processed_df
341
- return domain_df
342
 
343
 
344
  def process_excel(file):
 
 
345
  try:
346
  # Ensure the file path is correct
 
347
  file_path = file.name if hasattr(file, 'name') else file
348
  # Read the Excel file
349
  df = pd.read_excel(file_path)
350
 
351
  # Process the DataFrame
352
- result_df = nlp_pipeline(df)
 
353
 
354
  # output_file = "Output_ProjectProposals.xlsx"
355
  output_file = "Output_Proposals.xlsx"
356
  result_df.to_excel(output_file, index=False)
357
-
358
- return output_file, "Santanu Banerjee" # Return the processed DataFrame as Excel file
 
359
 
360
  except Exception as e:
361
  # return str(e) # Return the error message
362
- error_message = f"Error processing file: {str(e)}"
363
- print(error_message) # Log the error
364
- return error_message, "Santanu Banerjee" # Return the error message to the user
 
 
365
 
366
 
367
 
 
327
  # Starting the Pipeline for Domain Extraction
328
  # Apply the text_processing_for_domain function to the DataFrame
329
  processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
330
+
331
+
332
  # Domain Clustering
333
  domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
334
 
335
+ try:
336
+ domain_df, optimal_n_clusters = extract_problem_domains(df, method='tfidf_kmeans')
337
+ print(f"Optimal clusters: {optimal_clusters}")
338
+ print(result_df.head())
339
+ except Exception as e:
340
+ print(f"Error in extract_problem_domains: {e}")
341
+
342
 
343
  # problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
344
  # location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
 
347
 
348
 
349
  # return processed_df
350
+ return domain_df, "NLP Pipeline"
351
 
352
 
353
  def process_excel(file):
354
+ console_messages = []
355
+
356
  try:
357
  # Ensure the file path is correct
358
+ console_messages.append("Reading the uploaded Excel file...")
359
  file_path = file.name if hasattr(file, 'name') else file
360
  # Read the Excel file
361
  df = pd.read_excel(file_path)
362
 
363
  # Process the DataFrame
364
+ console_messages.append("Processing the DataFrame...")
365
+ result_df, console_messages = nlp_pipeline(df)
366
 
367
  # output_file = "Output_ProjectProposals.xlsx"
368
  output_file = "Output_Proposals.xlsx"
369
  result_df.to_excel(output_file, index=False)
370
+
371
+ console_messages.append("Processing completed. Ready for download.")
372
+ return output_file, "\n".join(console_messages) # Return the processed DataFrame as Excel file
373
 
374
  except Exception as e:
375
  # return str(e) # Return the error message
376
+ # error_message = f"Error processing file: {str(e)}"
377
+ # print(error_message) # Log the error
378
+ console_messages.append(f"Error during processing: {str(e)}")
379
+ # return error_message, "Santanu Banerjee" # Return the error message to the user
380
+ return None, "\n".join(console_messages)
381
 
382
 
383