Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -234,7 +234,7 @@ def extract_problem_domains(df,
|
|
234 |
|
235 |
# console_messages.append("Returning from Problem Domain Extraction function.")
|
236 |
console_messages.append("Problem Domain Extraction completed.")
|
237 |
-
return df, optimal_n_clusters
|
238 |
|
239 |
|
240 |
|
@@ -321,7 +321,7 @@ def extract_location_clusters(df,
|
|
321 |
|
322 |
df = df.drop(text_column, axis=1)
|
323 |
console_messages.append("Location Clustering completed.")
|
324 |
-
return df, optimal_n_clusters
|
325 |
|
326 |
|
327 |
|
@@ -434,7 +434,7 @@ def nlp_pipeline(original_df):
|
|
434 |
|
435 |
# Domain Clustering
|
436 |
try:
|
437 |
-
processed_df, optimal_n_clusters = extract_problem_domains(processed_df)
|
438 |
console_messages.append(f"Optimal clusters for Domain extraction: {optimal_n_clusters}")
|
439 |
except Exception as e:
|
440 |
console_messages.append(f"Error in extract_problem_domains: {str(e)}")
|
@@ -449,7 +449,7 @@ def nlp_pipeline(original_df):
|
|
449 |
|
450 |
# Location Clustering
|
451 |
try:
|
452 |
-
processed_df, optimal_n_clusters = extract_location_clusters(processed_df)
|
453 |
console_messages.append(f"Optimal clusters for Location extraction: {optimal_n_clusters}")
|
454 |
except Exception as e:
|
455 |
console_messages.append(f"Error in extract_location_clusters: {str(e)}")
|
@@ -462,13 +462,13 @@ def nlp_pipeline(original_df):
|
|
462 |
# Create cluster dataframes
|
463 |
budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
|
464 |
|
465 |
-
# Generate project proposals
|
466 |
-
location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
|
467 |
-
problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
|
468 |
project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
|
469 |
|
470 |
console_messages.append("NLP pipeline completed.")
|
471 |
-
return processed_df, budget_cluster_df, problem_cluster_df, project_proposals
|
472 |
|
473 |
|
474 |
|
@@ -488,11 +488,16 @@ def process_excel(file):
|
|
488 |
try:
|
489 |
# Process the DataFrame
|
490 |
console_messages.append("Processing the DataFrame...")
|
491 |
-
processed_df, budget_cluster_df, problem_cluster_df, project_proposals = nlp_pipeline(df)
|
492 |
|
493 |
output_filename = "OutPut_PPs.xlsx"
|
494 |
with pd.ExcelWriter(output_filename) as writer:
|
495 |
-
project_proposals
|
|
|
|
|
|
|
|
|
|
|
496 |
budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
|
497 |
problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
|
498 |
processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
|
|
|
234 |
|
235 |
# console_messages.append("Returning from Problem Domain Extraction function.")
|
236 |
console_messages.append("Problem Domain Extraction completed.")
|
237 |
+
return df, optimal_n_clusters, cluster_representations
|
238 |
|
239 |
|
240 |
|
|
|
321 |
|
322 |
df = df.drop(text_column, axis=1)
|
323 |
console_messages.append("Location Clustering completed.")
|
324 |
+
return df, optimal_n_clusters, cluster_representations
|
325 |
|
326 |
|
327 |
|
|
|
434 |
|
435 |
# Domain Clustering
|
436 |
try:
|
437 |
+
processed_df, optimal_n_clusters, problem_clusters = extract_problem_domains(processed_df)
|
438 |
console_messages.append(f"Optimal clusters for Domain extraction: {optimal_n_clusters}")
|
439 |
except Exception as e:
|
440 |
console_messages.append(f"Error in extract_problem_domains: {str(e)}")
|
|
|
449 |
|
450 |
# Location Clustering
|
451 |
try:
|
452 |
+
processed_df, optimal_n_clusters, location_clusters = extract_location_clusters(processed_df)
|
453 |
console_messages.append(f"Optimal clusters for Location extraction: {optimal_n_clusters}")
|
454 |
except Exception as e:
|
455 |
console_messages.append(f"Error in extract_location_clusters: {str(e)}")
|
|
|
462 |
# Create cluster dataframes
|
463 |
budget_cluster_df, problem_cluster_df = create_cluster_dataframes(processed_df)
|
464 |
|
465 |
+
# # Generate project proposals
|
466 |
+
# location_clusters = dict(enumerate(processed_df['Location_Category_Words'].unique()))
|
467 |
+
# problem_clusters = dict(enumerate(processed_df['Problem_Category_Words'].unique()))
|
468 |
project_proposals = create_project_proposals(budget_cluster_df, problem_cluster_df, location_clusters, problem_clusters)
|
469 |
|
470 |
console_messages.append("NLP pipeline completed.")
|
471 |
+
return processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters
|
472 |
|
473 |
|
474 |
|
|
|
488 |
try:
|
489 |
# Process the DataFrame
|
490 |
console_messages.append("Processing the DataFrame...")
|
491 |
+
processed_df, budget_cluster_df, problem_cluster_df, project_proposals, location_clusters, problem_clusters = nlp_pipeline(df)
|
492 |
|
493 |
output_filename = "OutPut_PPs.xlsx"
|
494 |
with pd.ExcelWriter(output_filename) as writer:
|
495 |
+
# Convert project_proposals dictionary to DataFrame
|
496 |
+
project_proposals_df = pd.DataFrame.from_dict(project_proposals, orient='index', columns=['Solutions Proposed'])
|
497 |
+
project_proposals_df.index.names = ['Location_Cluster', 'Problem_Cluster']
|
498 |
+
project_proposals_df.reset_index(inplace=True)
|
499 |
+
project_proposals_df.to_excel(writer, sheet_name='Project_Proposals', index=False)
|
500 |
+
|
501 |
budget_cluster_df.to_excel(writer, sheet_name='Financial_Weights')
|
502 |
problem_cluster_df.to_excel(writer, sheet_name='Problem_Descriptions')
|
503 |
processed_df.to_excel(writer, sheet_name='Input_Processed', index=False)
|