Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
|
4 |
-
def data_pre_processing(file_responses):
|
|
|
5 |
# Financial Weights can be anything (ultimately the row-wise weights are aggregated and the corresponding fractions are obtained from that rows' total tax payed)
|
6 |
|
7 |
try: # Define the columns to be processed
|
@@ -62,11 +63,14 @@ def data_pre_processing(file_responses):
|
|
62 |
|
63 |
|
64 |
# Different return can be used to check the processing
|
|
|
65 |
# return file_responses
|
66 |
-
return merged_dataset
|
67 |
|
68 |
except Exception as e:
|
69 |
-
|
|
|
|
|
70 |
|
71 |
|
72 |
|
@@ -225,8 +229,8 @@ def extract_problem_domains(df,
|
|
225 |
text_column='Problem_Description',
|
226 |
cluster_range=(10, 50),
|
227 |
top_words=17,
|
228 |
-
|
229 |
-
method='tfidf_kmeans'
|
230 |
):
|
231 |
|
232 |
|
@@ -317,11 +321,12 @@ def extract_problem_domains(df,
|
|
317 |
|
318 |
|
319 |
|
320 |
-
|
321 |
-
|
322 |
-
|
|
|
323 |
# Data Preprocessing
|
324 |
-
processed_df = data_pre_processing(original_df) # merged_dataset
|
325 |
|
326 |
|
327 |
# Starting the Pipeline for Domain Extraction
|
@@ -344,10 +349,9 @@ def nlp_pipeline(original_df):
|
|
344 |
# location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
|
345 |
|
346 |
|
347 |
-
|
348 |
-
|
349 |
# return processed_df
|
350 |
-
return domain_df,
|
351 |
|
352 |
|
353 |
def process_excel(file):
|
@@ -362,7 +366,7 @@ def process_excel(file):
|
|
362 |
|
363 |
# Process the DataFrame
|
364 |
console_messages.append("Processing the DataFrame...")
|
365 |
-
result_df, console_messages = nlp_pipeline(df)
|
366 |
|
367 |
# output_file = "Output_ProjectProposals.xlsx"
|
368 |
output_file = "Output_Proposals.xlsx"
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
|
4 |
+
def data_pre_processing(file_responses, console_messages):
|
5 |
+
console_messages.append("Starting data pre-processing...")
|
6 |
# Financial Weights can be anything (ultimately the row-wise weights are aggregated and the corresponding fractions are obtained from that rows' total tax payed)
|
7 |
|
8 |
try: # Define the columns to be processed
|
|
|
63 |
|
64 |
|
65 |
# Different return can be used to check the processing
|
66 |
+
console_messages.append("Data pre-processing completed.")
|
67 |
# return file_responses
|
68 |
+
return merged_dataset, console_messages
|
69 |
|
70 |
except Exception as e:
|
71 |
+
console_messages.append(f"Error during data pre-processing: {str(e)}")
|
72 |
+
# return str(e), console_messages
|
73 |
+
return None, console_messages
|
74 |
|
75 |
|
76 |
|
|
|
229 |
text_column='Problem_Description',
|
230 |
cluster_range=(10, 50),
|
231 |
top_words=17,
|
232 |
+
method='sentence_transformers'
|
233 |
+
# method='tfidf_kmeans'
|
234 |
):
|
235 |
|
236 |
|
|
|
321 |
|
322 |
|
323 |
|
324 |
+
# def nlp_pipeline(original_df):
|
325 |
+
def nlp_pipeline(original_df, console_messages):
|
326 |
+
console_messages.append("Starting NLP pipeline...")
|
327 |
+
|
328 |
# Data Preprocessing
|
329 |
+
processed_df, console_messages = data_pre_processing(original_df, console_messages) # merged_dataset
|
330 |
|
331 |
|
332 |
# Starting the Pipeline for Domain Extraction
|
|
|
349 |
# location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
|
350 |
|
351 |
|
352 |
+
console_messages.append("NLP pipeline completed.")
|
|
|
353 |
# return processed_df
|
354 |
+
return domain_df, console_messages
|
355 |
|
356 |
|
357 |
def process_excel(file):
|
|
|
366 |
|
367 |
# Process the DataFrame
|
368 |
console_messages.append("Processing the DataFrame...")
|
369 |
+
result_df, console_messages = nlp_pipeline(df, console_messages)
|
370 |
|
371 |
# output_file = "Output_ProjectProposals.xlsx"
|
372 |
output_file = "Output_Proposals.xlsx"
|