Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -327,9 +327,18 @@ def nlp_pipeline(original_df):
|
|
327 |
# Starting the Pipeline for Domain Extraction
|
328 |
# Apply the text_processing_for_domain function to the DataFrame
|
329 |
processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
|
|
|
|
|
330 |
# Domain Clustering
|
331 |
domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
332 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
|
334 |
# problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
|
335 |
# location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
|
@@ -338,30 +347,37 @@ def nlp_pipeline(original_df):
|
|
338 |
|
339 |
|
340 |
# return processed_df
|
341 |
-
return domain_df
|
342 |
|
343 |
|
344 |
def process_excel(file):
|
|
|
|
|
345 |
try:
|
346 |
# Ensure the file path is correct
|
|
|
347 |
file_path = file.name if hasattr(file, 'name') else file
|
348 |
# Read the Excel file
|
349 |
df = pd.read_excel(file_path)
|
350 |
|
351 |
# Process the DataFrame
|
352 |
-
|
|
|
353 |
|
354 |
# output_file = "Output_ProjectProposals.xlsx"
|
355 |
output_file = "Output_Proposals.xlsx"
|
356 |
result_df.to_excel(output_file, index=False)
|
357 |
-
|
358 |
-
|
|
|
359 |
|
360 |
except Exception as e:
|
361 |
# return str(e) # Return the error message
|
362 |
-
error_message = f"Error processing file: {str(e)}"
|
363 |
-
print(error_message) # Log the error
|
364 |
-
|
|
|
|
|
365 |
|
366 |
|
367 |
|
|
|
327 |
# Starting the Pipeline for Domain Extraction
|
328 |
# Apply the text_processing_for_domain function to the DataFrame
|
329 |
processed_df['Processed_ProblemDescription_forDomainExtraction'] = processed_df['Problem_Description'].apply(text_processing_for_domain)
|
330 |
+
|
331 |
+
|
332 |
# Domain Clustering
|
333 |
domain_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
334 |
|
335 |
+
try:
|
336 |
+
domain_df, optimal_n_clusters = extract_problem_domains(df, method='tfidf_kmeans')
|
337 |
+
print(f"Optimal clusters: {optimal_clusters}")
|
338 |
+
print(result_df.head())
|
339 |
+
except Exception as e:
|
340 |
+
print(f"Error in extract_problem_domains: {e}")
|
341 |
+
|
342 |
|
343 |
# problem_clusters, problem_model = perform_clustering(processed_df['Problem_Description'], n_clusters=10)
|
344 |
# location_clusters, location_model = perform_clustering(processed_df['Geographical_Location'], n_clusters=5)
|
|
|
347 |
|
348 |
|
349 |
# return processed_df
|
350 |
+
return domain_df, "NLP Pipeline"
|
351 |
|
352 |
|
353 |
def process_excel(file):
|
354 |
+
console_messages = []
|
355 |
+
|
356 |
try:
|
357 |
# Ensure the file path is correct
|
358 |
+
console_messages.append("Reading the uploaded Excel file...")
|
359 |
file_path = file.name if hasattr(file, 'name') else file
|
360 |
# Read the Excel file
|
361 |
df = pd.read_excel(file_path)
|
362 |
|
363 |
# Process the DataFrame
|
364 |
+
console_messages.append("Processing the DataFrame...")
|
365 |
+
result_df, console_messages = nlp_pipeline(df)
|
366 |
|
367 |
# output_file = "Output_ProjectProposals.xlsx"
|
368 |
output_file = "Output_Proposals.xlsx"
|
369 |
result_df.to_excel(output_file, index=False)
|
370 |
+
|
371 |
+
console_messages.append("Processing completed. Ready for download.")
|
372 |
+
return output_file, "\n".join(console_messages) # Return the processed DataFrame as Excel file
|
373 |
|
374 |
except Exception as e:
|
375 |
# return str(e) # Return the error message
|
376 |
+
# error_message = f"Error processing file: {str(e)}"
|
377 |
+
# print(error_message) # Log the error
|
378 |
+
console_messages.append(f"Error during processing: {str(e)}")
|
379 |
+
# return error_message, "Santanu Banerjee" # Return the error message to the user
|
380 |
+
return None, "\n".join(console_messages)
|
381 |
|
382 |
|
383 |
|