SantanuBanerjee commited on
Commit
b446f1b
·
verified ·
1 Parent(s): e636253

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import gradio as gr
2
  import pandas as pd
3
 
4
- def data_pre_processing(file_responses):
 
5
  # Financial Weights can be anything (ultimately the row-wise weights are aggregated and the corresponding fractions are obtained from that rows' total tax payed)
6
 
7
  try: # Define the columns to be processed
@@ -62,11 +63,14 @@ def data_pre_processing(file_responses):
62
 
63
 
64
  # Different return can be used to check the processing
 
65
  # return file_responses
66
- return merged_dataset
67
 
68
  except Exception as e:
69
- return str(e)
 
 
70
 
71
 
72
 
@@ -225,8 +229,8 @@ def extract_problem_domains(df,
225
  text_column='Problem_Description',
226
  cluster_range=(10, 50),
227
  top_words=17,
228
- # method='sentence_transformers'
229
- method='tfidf_kmeans'
230
  ):
231
 
232
 
@@ -317,11 +321,12 @@ def extract_problem_domains(df,
317
 
318
 
319
 
320
-
321
-
322
- def nlp_pipeline(original_df):
 
323
  # Data Preprocessing
324
- processed_df = data_pre_processing(original_df) # merged_dataset
325
 
326
 
327
  # Starting the Pipeline for Domain Extraction
@@ -344,10 +349,9 @@ def nlp_pipeline(original_df):
344
  # location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
345
 
346
 
347
-
348
-
349
  # return processed_df
350
- return domain_df, "NLP Pipeline"
351
 
352
 
353
  def process_excel(file):
@@ -362,7 +366,7 @@ def process_excel(file):
362
 
363
  # Process the DataFrame
364
  console_messages.append("Processing the DataFrame...")
365
- result_df, console_messages = nlp_pipeline(df)
366
 
367
  # output_file = "Output_ProjectProposals.xlsx"
368
  output_file = "Output_Proposals.xlsx"
 
1
  import gradio as gr
2
  import pandas as pd
3
 
4
+ def data_pre_processing(file_responses, console_messages):
5
+ console_messages.append("Starting data pre-processing...")
6
  # Financial Weights can be anything (ultimately the row-wise weights are aggregated and the corresponding fractions are obtained from that rows' total tax payed)
7
 
8
  try: # Define the columns to be processed
 
63
 
64
 
65
  # Different return can be used to check the processing
66
+ console_messages.append("Data pre-processing completed.")
67
  # return file_responses
68
+ return merged_dataset, console_messages
69
 
70
  except Exception as e:
71
+ console_messages.append(f"Error during data pre-processing: {str(e)}")
72
+ # return str(e), console_messages
73
+ return None, console_messages
74
 
75
 
76
 
 
229
  text_column='Problem_Description',
230
  cluster_range=(10, 50),
231
  top_words=17,
232
+ method='sentence_transformers'
233
+ # method='tfidf_kmeans'
234
  ):
235
 
236
 
 
321
 
322
 
323
 
324
+ # def nlp_pipeline(original_df):
325
+ def nlp_pipeline(original_df, console_messages):
326
+ console_messages.append("Starting NLP pipeline...")
327
+
328
  # Data Preprocessing
329
+ processed_df, console_messages = data_pre_processing(original_df, console_messages) # merged_dataset
330
 
331
 
332
  # Starting the Pipeline for Domain Extraction
 
349
  # location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
350
 
351
 
352
+ console_messages.append("NLP pipeline completed.")
 
353
  # return processed_df
354
+ return domain_df, console_messages
355
 
356
 
357
  def process_excel(file):
 
366
 
367
  # Process the DataFrame
368
  console_messages.append("Processing the DataFrame...")
369
+ result_df, console_messages = nlp_pipeline(df, console_messages)
370
 
371
  # output_file = "Output_ProjectProposals.xlsx"
372
  output_file = "Output_Proposals.xlsx"